fix: harden pi robot review harness

This commit is contained in:
wassname
2026-04-17 12:02:35 +08:00
parent d52596aad1
commit d908f6f617
3 changed files with 205 additions and 62 deletions
+53
View File
@@ -0,0 +1,53 @@
# Robot Review Lane
## Goal
Add a separate review artifact for fresh-perspective subagent review without weakening the existing human `/lgtm` sign-off gate.
## Scope
In: task schema, task tool UX, widget/task badges, README/tests, Pi-native robot-review harness hardening.
Out: provider routing or third-party subagent package integration.
## Requirements
- R1: Tasks can store a robot review separately from human sign-off evidence. Done means: a task can contain both `lgtm_ask` evidence and robot observations without conflict.
- R2: Robot review is observational only. Done means: the tool schema and help text ask for observations, not recommendations or editorial.
- R3: UI exposes review lanes distinctly. Done means: task list/widget/details show tool/robot/human review badges.
- R4: Human `/lgtm` remains the only completion path. Done means: no robot review path can complete tasks.
- R5: Automatic Pi robot review must be operationally robust. Done means: the child reviewer has a bounded timeout, uses a deterministic Pi invocation, and reports infra failures clearly enough to diagnose without hanging the main tool call.
- R6: The subprocess harness is covered by focused tests. Done means: there are tests for invocation selection, timeout/abort behavior, and assistant-output parsing.
## Tasks
- [x] T1 (R1, R2, R3, R4): Add robot-review storage and tool.
- steps: update task typing helpers, register `robot_review_ask`, thread robot review metadata through task views
- verify: `npm test -- --runInBand`
- UAT: "when a task has lgtm evidence and robot observations, I observe both badges and `/lgtm` still controls completion"
- [x] T2 (R3): Update README and examples.
- steps: document badges and robot review workflow
- verify: `rg -n "robot_review_ask|🤖|🛠" README.md`
- UAT: "when I read the README, I observe a distinct robot review lane"
- [x] T3 (R5): Harden the Pi-native robot-review subprocess runner.
- steps: add timeout handling, replace fragile self-reinvocation logic with deterministic command resolution, improve failure messages
- verify: `npx vitest run test/robot-review-runner.test.ts`
- UAT: "when the child reviewer hangs or pi is not resolvable, lgtm_ask returns a bounded failure instead of hanging forever"
- [x] T4 (R6): Add focused harness tests.
- steps: extract/mock subprocess runner boundaries and cover timeout, parse, and command resolution behavior
- verify: `npx vitest run test/robot-review-runner.test.ts test/robot-review.test.ts`
- UAT: "when I run the focused tests, I observe the subprocess path itself is covered"
## Context
- Existing schema uses `pending_approval` as the human sign-off gate.
- Current UI already appends `👀` for pending human sign-off; extend rather than replace the completion rule.
## Log
- The least disruptive model is additive metadata plus badge rendering, not replacing the task lifecycle.
- The repo's full Vitest suite already has drift unrelated to this feature, so focused verification is needed to isolate new behavior.
- A Pi-native reviewer stage matches the official subagent example better than ACP/external CLIs, but it makes harness reliability part of the approval path and therefore needs explicit timeout and invocation hardening.
- A deterministic `pi` command plus an explicit timeout is simpler and more portable than trying to reconstruct the current host entrypoint from `process.argv`.
## TODO
- Optional future work: add an orchestrated cross-model reviewer via `external-review` or ACP.
## Errors
| Task | Error | Resolution |
|------|-------|------------|
| T1 | `npm test` failed with 17 pre-existing assertions unrelated to robot review | Verified with `npm run lint`, `npm run typecheck`, and focused `npx vitest run test/review-badges.test.ts` instead |
| T3 | `process.argv[1]`-based self-reinvocation was fragile in extension-hosted contexts | Replaced with `PI_LGTM_PI_BIN` override or plain `pi`, then added focused runner tests |
+98 -62
View File
@@ -38,15 +38,105 @@ function textResult(msg: string) {
const TASK_TOOL_NAMES = new Set(["TaskCreate", "TaskList", "TaskGet", "TaskUpdate", "lgtm_ask", "robot_review_ask", "robot_review_run"]); const TASK_TOOL_NAMES = new Set(["TaskCreate", "TaskList", "TaskGet", "TaskUpdate", "lgtm_ask", "robot_review_ask", "robot_review_run"]);
const REMINDER_INTERVAL = 4; const REMINDER_INTERVAL = 4;
const AUTO_CLEAR_DELAY = 4; const AUTO_CLEAR_DELAY = 4;
export const DEFAULT_ROBOT_REVIEW_TIMEOUT_MS = 120_000;
type CommandResult = { stdout: string; stderr: string; exitCode: number | null }; type CommandResult = { stdout: string; stderr: string; exitCode: number | null };
function getPiInvocation(args: string[]): { command: string; args: string[] } { export function getPiInvocation(
const currentScript = process.argv[1]; args: string[],
if (currentScript) { env: NodeJS.ProcessEnv = process.env,
return { command: process.execPath, args: [currentScript, ...args] }; ): { command: string; args: string[] } {
const configured = env.PI_LGTM_PI_BIN?.trim();
return { command: configured || "pi", args };
}
export function getRobotReviewTimeoutMs(env: NodeJS.ProcessEnv = process.env): number {
const configured = Number.parseInt(env.PI_LGTM_ROBOT_REVIEW_TIMEOUT_MS ?? "", 10);
return Number.isFinite(configured) && configured > 0 ? configured : DEFAULT_ROBOT_REVIEW_TIMEOUT_MS;
}
function getAssistantTextFromPiEvent(event: any): string | undefined {
if (event?.type !== "message_end" || event.message?.role !== "assistant" || !Array.isArray(event.message.content)) {
return undefined;
} }
return { command: "pi", args }; const text = event.message.content.find((part: any) => part?.type === "text")?.text;
return typeof text === "string" ? text : undefined;
}
export function extractFinalAssistantTextFromPiJsonl(output: string): string {
let buffer = "";
let finalAssistantText = "";
const lines = output.split("\n");
for (const line of lines) {
if (!line.trim()) continue;
buffer = line;
try {
const text = getAssistantTextFromPiEvent(JSON.parse(line));
if (text) finalAssistantText = text;
buffer = "";
} catch {
// ignore malformed line noise from the child process
}
}
if (buffer.trim()) {
try {
const text = getAssistantTextFromPiEvent(JSON.parse(buffer));
if (text) finalAssistantText = text;
} catch {
// ignore malformed trailing line
}
}
return finalAssistantText;
}
export async function runRobotReviewCommand(
invocation: { command: string; args: string[] },
signal?: AbortSignal,
timeoutMs = getRobotReviewTimeoutMs(),
): Promise<CommandResult> {
return new Promise<CommandResult>((resolve, reject) => {
const child = spawn(invocation.command, invocation.args, { shell: false, stdio: ["ignore", "pipe", "pipe"] });
const stdoutChunks: Buffer[] = [];
const stderrChunks: Buffer[] = [];
let settled = false;
const finish = (fn: () => void) => {
if (settled) return;
settled = true;
fn();
};
const killTimer = setTimeout(() => {
child.kill("SIGTERM");
finish(() => reject(new Error(`Robot reviewer timed out after ${timeoutMs}ms.`)));
}, timeoutMs);
child.stdout.on("data", (data) => stdoutChunks.push(data));
child.stderr.on("data", (data) => stderrChunks.push(data));
child.on("error", (err) => {
clearTimeout(killTimer);
finish(() => reject(err));
});
const onAbort = () => {
clearTimeout(killTimer);
child.kill("SIGTERM");
};
signal?.addEventListener("abort", onAbort, { once: true });
child.on("close", (exitCode) => {
clearTimeout(killTimer);
signal?.removeEventListener("abort", onAbort);
if (signal?.aborted) {
finish(() => reject(new Error("aborted")));
return;
}
const stdout = Buffer.concat(stdoutChunks).toString("utf-8");
finish(() => resolve({
stdout: extractFinalAssistantTextFromPiJsonl(stdout) || stdout,
stderr: Buffer.concat(stderrChunks).toString("utf-8"),
exitCode,
}));
});
});
} }
function extractRobotReviewJson(output: string): Record<string, unknown> { function extractRobotReviewJson(output: string): Record<string, unknown> {
@@ -113,63 +203,9 @@ async function runAutomaticRobotReview(
if (reviewerModel) args.push("--model", reviewerModel); if (reviewerModel) args.push("--model", reviewerModel);
args.push(prompt); args.push(prompt);
const invocation = getPiInvocation(args); const invocation = getPiInvocation(args);
const commandLabel = `${invocation.command} ${args.slice(0, reviewerModel ? 6 : 4).join(" ")}`; const timeoutMs = getRobotReviewTimeoutMs();
const result = await new Promise<CommandResult>((resolve, reject) => { const commandLabel = `${invocation.command} ${invocation.args.slice(0, reviewerModel ? 6 : 4).join(" ")}`;
const child = spawn(invocation.command, invocation.args, { shell: false, stdio: ["ignore", "pipe", "pipe"] }); const result = await runRobotReviewCommand(invocation, signal, timeoutMs);
const stdoutChunks: Buffer[] = [];
const stderrChunks: Buffer[] = [];
let buffer = "";
let finalAssistantText = "";
child.stdout.on("data", (data) => {
stdoutChunks.push(data);
buffer += data.toString("utf-8");
const lines = buffer.split("\n");
buffer = lines.pop() || "";
for (const line of lines) {
if (!line.trim()) continue;
try {
const event = JSON.parse(line) as any;
if (event.type === "message_end" && event.message?.role === "assistant") {
const text = Array.isArray(event.message.content)
? event.message.content.find((part: any) => part.type === "text")?.text
: undefined;
if (typeof text === "string") finalAssistantText = text;
}
} catch {
// ignore malformed line noise
}
}
});
child.stderr.on("data", (data) => stderrChunks.push(data));
child.on("error", reject);
const onAbort = () => child.kill();
signal?.addEventListener("abort", onAbort, { once: true });
child.on("close", (exitCode) => {
signal?.removeEventListener("abort", onAbort);
if (signal?.aborted) {
reject(new Error("aborted"));
return;
}
if (buffer.trim()) {
try {
const event = JSON.parse(buffer) as any;
if (event.type === "message_end" && event.message?.role === "assistant") {
const text = Array.isArray(event.message.content)
? event.message.content.find((part: any) => part.type === "text")?.text
: undefined;
if (typeof text === "string") finalAssistantText = text;
}
} catch {
// ignore malformed trailing line
}
}
resolve({
stdout: finalAssistantText || Buffer.concat(stdoutChunks).toString("utf-8"),
stderr: Buffer.concat(stderrChunks).toString("utf-8"),
exitCode,
});
});
});
if (result.exitCode !== 0) { if (result.exitCode !== 0) {
throw new Error(`Robot reviewer failed (${result.exitCode ?? "?"}): ${(result.stderr || result.stdout).trim()}`); throw new Error(`Robot reviewer failed (${result.exitCode ?? "?"}): ${(result.stderr || result.stdout).trim()}`);
} }
+54
View File
@@ -0,0 +1,54 @@
import { describe, expect, it } from "vitest";
import {
DEFAULT_ROBOT_REVIEW_TIMEOUT_MS,
extractFinalAssistantTextFromPiJsonl,
getPiInvocation,
getRobotReviewTimeoutMs,
runRobotReviewCommand,
} from "../src/index.js";
describe("robot review runner helpers", () => {
it("uses plain pi by default and allows override", () => {
expect(getPiInvocation(["--mode", "json"], {} as NodeJS.ProcessEnv)).toEqual({
command: "pi",
args: ["--mode", "json"],
});
expect(getPiInvocation(["-p"], { PI_LGTM_PI_BIN: "/custom/pi" } as NodeJS.ProcessEnv)).toEqual({
command: "/custom/pi",
args: ["-p"],
});
});
it("parses the final assistant text from pi jsonl", () => {
const output = [
"{\"type\":\"message_update\"}",
"{\"type\":\"message_end\",\"message\":{\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"ROBOT_REVIEW_JSON_START {\\\"accepted\\\":true} ROBOT_REVIEW_JSON_END\"}]}}",
].join("\n");
expect(extractFinalAssistantTextFromPiJsonl(output)).toContain("ROBOT_REVIEW_JSON_START");
});
it("uses configured timeout or falls back to default", () => {
expect(getRobotReviewTimeoutMs({ PI_LGTM_ROBOT_REVIEW_TIMEOUT_MS: "2500" } as NodeJS.ProcessEnv)).toBe(2500);
expect(getRobotReviewTimeoutMs({ PI_LGTM_ROBOT_REVIEW_TIMEOUT_MS: "bad" } as NodeJS.ProcessEnv)).toBe(DEFAULT_ROBOT_REVIEW_TIMEOUT_MS);
});
it("times out bounded child commands", async () => {
await expect(runRobotReviewCommand({
command: process.execPath,
args: ["-e", "setTimeout(() => {}, 1000)"],
}, undefined, 25)).rejects.toThrow(/timed out/i);
});
it("extracts assistant text from a child jsonl process", async () => {
const script = [
"process.stdout.write(JSON.stringify({type:'message_update'}) + '\\n');",
"process.stdout.write(JSON.stringify({type:'message_end',message:{role:'assistant',content:[{type:'text',text:'ROBOT_REVIEW_JSON_START {\\\"accepted\\\":true,\\\"observations\\\":[\\\"ok\\\"]} ROBOT_REVIEW_JSON_END'}]}}) + '\\n');",
].join("");
const result = await runRobotReviewCommand({
command: process.execPath,
args: ["-e", script],
}, undefined, 500);
expect(result.exitCode).toBe(0);
expect(result.stdout).toContain("ROBOT_REVIEW_JSON_END");
});
});