mirror of
https://github.com/wassname/pi-lgtm.git
synced 2026-06-27 17:01:35 +08:00
fix: harden pi robot review harness
This commit is contained in:
@@ -0,0 +1,53 @@
|
||||
# Robot Review Lane
|
||||
|
||||
## Goal
|
||||
Add a separate review artifact for fresh-perspective subagent review without weakening the existing human `/lgtm` sign-off gate.
|
||||
|
||||
## Scope
|
||||
In: task schema, task tool UX, widget/task badges, README/tests, Pi-native robot-review harness hardening.
|
||||
Out: provider routing or third-party subagent package integration.
|
||||
|
||||
## Requirements
|
||||
- R1: Tasks can store a robot review separately from human sign-off evidence. Done means: a task can contain both `lgtm_ask` evidence and robot observations without conflict.
|
||||
- R2: Robot review is observational only. Done means: the tool schema and help text ask for observations, not recommendations or editorial.
|
||||
- R3: UI exposes review lanes distinctly. Done means: task list/widget/details show tool/robot/human review badges.
|
||||
- R4: Human `/lgtm` remains the only completion path. Done means: no robot review path can complete tasks.
|
||||
- R5: Automatic Pi robot review must be operationally robust. Done means: the child reviewer has a bounded timeout, uses a deterministic Pi invocation, and reports infra failures clearly enough to diagnose without hanging the main tool call.
|
||||
- R6: The subprocess harness is covered by focused tests. Done means: there are tests for invocation selection, timeout/abort behavior, and assistant-output parsing.
|
||||
|
||||
## Tasks
|
||||
- [x] T1 (R1, R2, R3, R4): Add robot-review storage and tool.
|
||||
- steps: update task typing helpers, register `robot_review_ask`, thread robot review metadata through task views
|
||||
- verify: `npm test -- --runInBand`
|
||||
- UAT: "when a task has lgtm evidence and robot observations, I observe both badges and `/lgtm` still controls completion"
|
||||
- [x] T2 (R3): Update README and examples.
|
||||
- steps: document badges and robot review workflow
|
||||
- verify: `rg -n "robot_review_ask|🤖|🛠" README.md`
|
||||
- UAT: "when I read the README, I observe a distinct robot review lane"
|
||||
- [x] T3 (R5): Harden the Pi-native robot-review subprocess runner.
|
||||
- steps: add timeout handling, replace fragile self-reinvocation logic with deterministic command resolution, improve failure messages
|
||||
- verify: `npx vitest run test/robot-review-runner.test.ts`
|
||||
- UAT: "when the child reviewer hangs or pi is not resolvable, lgtm_ask returns a bounded failure instead of hanging forever"
|
||||
- [x] T4 (R6): Add focused harness tests.
|
||||
- steps: extract/mock subprocess runner boundaries and cover timeout, parse, and command resolution behavior
|
||||
- verify: `npx vitest run test/robot-review-runner.test.ts test/robot-review.test.ts`
|
||||
- UAT: "when I run the focused tests, I observe the subprocess path itself is covered"
|
||||
|
||||
## Context
|
||||
- Existing schema uses `pending_approval` as the human sign-off gate.
|
||||
- Current UI already appends `👀` for pending human sign-off; extend rather than replace the completion rule.
|
||||
|
||||
## Log
|
||||
- The least disruptive model is additive metadata plus badge rendering, not replacing the task lifecycle.
|
||||
- The repo's full Vitest suite already has drift unrelated to this feature, so focused verification is needed to isolate new behavior.
|
||||
- A Pi-native reviewer stage matches the official subagent example better than ACP/external CLIs, but it makes harness reliability part of the approval path and therefore needs explicit timeout and invocation hardening.
|
||||
- A deterministic `pi` command plus an explicit timeout is simpler and more portable than trying to reconstruct the current host entrypoint from `process.argv`.
|
||||
|
||||
## TODO
|
||||
- Optional future work: add an orchestrated cross-model reviewer via `external-review` or ACP.
|
||||
|
||||
## Errors
|
||||
| Task | Error | Resolution |
|
||||
|------|-------|------------|
|
||||
| T1 | `npm test` failed with 17 pre-existing assertions unrelated to robot review | Verified with `npm run lint`, `npm run typecheck`, and focused `npx vitest run test/review-badges.test.ts` instead |
|
||||
| T3 | `process.argv[1]`-based self-reinvocation was fragile in extension-hosted contexts | Replaced with `PI_LGTM_PI_BIN` override or plain `pi`, then added focused runner tests |
|
||||
+98
-62
@@ -38,15 +38,105 @@ function textResult(msg: string) {
|
||||
const TASK_TOOL_NAMES = new Set(["TaskCreate", "TaskList", "TaskGet", "TaskUpdate", "lgtm_ask", "robot_review_ask", "robot_review_run"]);
|
||||
const REMINDER_INTERVAL = 4;
|
||||
const AUTO_CLEAR_DELAY = 4;
|
||||
export const DEFAULT_ROBOT_REVIEW_TIMEOUT_MS = 120_000;
|
||||
|
||||
type CommandResult = { stdout: string; stderr: string; exitCode: number | null };
|
||||
|
||||
function getPiInvocation(args: string[]): { command: string; args: string[] } {
|
||||
const currentScript = process.argv[1];
|
||||
if (currentScript) {
|
||||
return { command: process.execPath, args: [currentScript, ...args] };
|
||||
export function getPiInvocation(
|
||||
args: string[],
|
||||
env: NodeJS.ProcessEnv = process.env,
|
||||
): { command: string; args: string[] } {
|
||||
const configured = env.PI_LGTM_PI_BIN?.trim();
|
||||
return { command: configured || "pi", args };
|
||||
}
|
||||
|
||||
export function getRobotReviewTimeoutMs(env: NodeJS.ProcessEnv = process.env): number {
|
||||
const configured = Number.parseInt(env.PI_LGTM_ROBOT_REVIEW_TIMEOUT_MS ?? "", 10);
|
||||
return Number.isFinite(configured) && configured > 0 ? configured : DEFAULT_ROBOT_REVIEW_TIMEOUT_MS;
|
||||
}
|
||||
|
||||
function getAssistantTextFromPiEvent(event: any): string | undefined {
|
||||
if (event?.type !== "message_end" || event.message?.role !== "assistant" || !Array.isArray(event.message.content)) {
|
||||
return undefined;
|
||||
}
|
||||
return { command: "pi", args };
|
||||
const text = event.message.content.find((part: any) => part?.type === "text")?.text;
|
||||
return typeof text === "string" ? text : undefined;
|
||||
}
|
||||
|
||||
export function extractFinalAssistantTextFromPiJsonl(output: string): string {
|
||||
let buffer = "";
|
||||
let finalAssistantText = "";
|
||||
const lines = output.split("\n");
|
||||
for (const line of lines) {
|
||||
if (!line.trim()) continue;
|
||||
buffer = line;
|
||||
try {
|
||||
const text = getAssistantTextFromPiEvent(JSON.parse(line));
|
||||
if (text) finalAssistantText = text;
|
||||
buffer = "";
|
||||
} catch {
|
||||
// ignore malformed line noise from the child process
|
||||
}
|
||||
}
|
||||
if (buffer.trim()) {
|
||||
try {
|
||||
const text = getAssistantTextFromPiEvent(JSON.parse(buffer));
|
||||
if (text) finalAssistantText = text;
|
||||
} catch {
|
||||
// ignore malformed trailing line
|
||||
}
|
||||
}
|
||||
return finalAssistantText;
|
||||
}
|
||||
|
||||
export async function runRobotReviewCommand(
|
||||
invocation: { command: string; args: string[] },
|
||||
signal?: AbortSignal,
|
||||
timeoutMs = getRobotReviewTimeoutMs(),
|
||||
): Promise<CommandResult> {
|
||||
return new Promise<CommandResult>((resolve, reject) => {
|
||||
const child = spawn(invocation.command, invocation.args, { shell: false, stdio: ["ignore", "pipe", "pipe"] });
|
||||
const stdoutChunks: Buffer[] = [];
|
||||
const stderrChunks: Buffer[] = [];
|
||||
let settled = false;
|
||||
|
||||
const finish = (fn: () => void) => {
|
||||
if (settled) return;
|
||||
settled = true;
|
||||
fn();
|
||||
};
|
||||
|
||||
const killTimer = setTimeout(() => {
|
||||
child.kill("SIGTERM");
|
||||
finish(() => reject(new Error(`Robot reviewer timed out after ${timeoutMs}ms.`)));
|
||||
}, timeoutMs);
|
||||
|
||||
child.stdout.on("data", (data) => stdoutChunks.push(data));
|
||||
child.stderr.on("data", (data) => stderrChunks.push(data));
|
||||
child.on("error", (err) => {
|
||||
clearTimeout(killTimer);
|
||||
finish(() => reject(err));
|
||||
});
|
||||
const onAbort = () => {
|
||||
clearTimeout(killTimer);
|
||||
child.kill("SIGTERM");
|
||||
};
|
||||
signal?.addEventListener("abort", onAbort, { once: true });
|
||||
child.on("close", (exitCode) => {
|
||||
clearTimeout(killTimer);
|
||||
signal?.removeEventListener("abort", onAbort);
|
||||
if (signal?.aborted) {
|
||||
finish(() => reject(new Error("aborted")));
|
||||
return;
|
||||
}
|
||||
const stdout = Buffer.concat(stdoutChunks).toString("utf-8");
|
||||
finish(() => resolve({
|
||||
stdout: extractFinalAssistantTextFromPiJsonl(stdout) || stdout,
|
||||
stderr: Buffer.concat(stderrChunks).toString("utf-8"),
|
||||
exitCode,
|
||||
}));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function extractRobotReviewJson(output: string): Record<string, unknown> {
|
||||
@@ -113,63 +203,9 @@ async function runAutomaticRobotReview(
|
||||
if (reviewerModel) args.push("--model", reviewerModel);
|
||||
args.push(prompt);
|
||||
const invocation = getPiInvocation(args);
|
||||
const commandLabel = `${invocation.command} ${args.slice(0, reviewerModel ? 6 : 4).join(" ")}`;
|
||||
const result = await new Promise<CommandResult>((resolve, reject) => {
|
||||
const child = spawn(invocation.command, invocation.args, { shell: false, stdio: ["ignore", "pipe", "pipe"] });
|
||||
const stdoutChunks: Buffer[] = [];
|
||||
const stderrChunks: Buffer[] = [];
|
||||
let buffer = "";
|
||||
let finalAssistantText = "";
|
||||
child.stdout.on("data", (data) => {
|
||||
stdoutChunks.push(data);
|
||||
buffer += data.toString("utf-8");
|
||||
const lines = buffer.split("\n");
|
||||
buffer = lines.pop() || "";
|
||||
for (const line of lines) {
|
||||
if (!line.trim()) continue;
|
||||
try {
|
||||
const event = JSON.parse(line) as any;
|
||||
if (event.type === "message_end" && event.message?.role === "assistant") {
|
||||
const text = Array.isArray(event.message.content)
|
||||
? event.message.content.find((part: any) => part.type === "text")?.text
|
||||
: undefined;
|
||||
if (typeof text === "string") finalAssistantText = text;
|
||||
}
|
||||
} catch {
|
||||
// ignore malformed line noise
|
||||
}
|
||||
}
|
||||
});
|
||||
child.stderr.on("data", (data) => stderrChunks.push(data));
|
||||
child.on("error", reject);
|
||||
const onAbort = () => child.kill();
|
||||
signal?.addEventListener("abort", onAbort, { once: true });
|
||||
child.on("close", (exitCode) => {
|
||||
signal?.removeEventListener("abort", onAbort);
|
||||
if (signal?.aborted) {
|
||||
reject(new Error("aborted"));
|
||||
return;
|
||||
}
|
||||
if (buffer.trim()) {
|
||||
try {
|
||||
const event = JSON.parse(buffer) as any;
|
||||
if (event.type === "message_end" && event.message?.role === "assistant") {
|
||||
const text = Array.isArray(event.message.content)
|
||||
? event.message.content.find((part: any) => part.type === "text")?.text
|
||||
: undefined;
|
||||
if (typeof text === "string") finalAssistantText = text;
|
||||
}
|
||||
} catch {
|
||||
// ignore malformed trailing line
|
||||
}
|
||||
}
|
||||
resolve({
|
||||
stdout: finalAssistantText || Buffer.concat(stdoutChunks).toString("utf-8"),
|
||||
stderr: Buffer.concat(stderrChunks).toString("utf-8"),
|
||||
exitCode,
|
||||
});
|
||||
});
|
||||
});
|
||||
const timeoutMs = getRobotReviewTimeoutMs();
|
||||
const commandLabel = `${invocation.command} ${invocation.args.slice(0, reviewerModel ? 6 : 4).join(" ")}`;
|
||||
const result = await runRobotReviewCommand(invocation, signal, timeoutMs);
|
||||
if (result.exitCode !== 0) {
|
||||
throw new Error(`Robot reviewer failed (${result.exitCode ?? "?"}): ${(result.stderr || result.stdout).trim()}`);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
DEFAULT_ROBOT_REVIEW_TIMEOUT_MS,
|
||||
extractFinalAssistantTextFromPiJsonl,
|
||||
getPiInvocation,
|
||||
getRobotReviewTimeoutMs,
|
||||
runRobotReviewCommand,
|
||||
} from "../src/index.js";
|
||||
|
||||
describe("robot review runner helpers", () => {
|
||||
it("uses plain pi by default and allows override", () => {
|
||||
expect(getPiInvocation(["--mode", "json"], {} as NodeJS.ProcessEnv)).toEqual({
|
||||
command: "pi",
|
||||
args: ["--mode", "json"],
|
||||
});
|
||||
expect(getPiInvocation(["-p"], { PI_LGTM_PI_BIN: "/custom/pi" } as NodeJS.ProcessEnv)).toEqual({
|
||||
command: "/custom/pi",
|
||||
args: ["-p"],
|
||||
});
|
||||
});
|
||||
|
||||
it("parses the final assistant text from pi jsonl", () => {
|
||||
const output = [
|
||||
"{\"type\":\"message_update\"}",
|
||||
"{\"type\":\"message_end\",\"message\":{\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"ROBOT_REVIEW_JSON_START {\\\"accepted\\\":true} ROBOT_REVIEW_JSON_END\"}]}}",
|
||||
].join("\n");
|
||||
expect(extractFinalAssistantTextFromPiJsonl(output)).toContain("ROBOT_REVIEW_JSON_START");
|
||||
});
|
||||
|
||||
it("uses configured timeout or falls back to default", () => {
|
||||
expect(getRobotReviewTimeoutMs({ PI_LGTM_ROBOT_REVIEW_TIMEOUT_MS: "2500" } as NodeJS.ProcessEnv)).toBe(2500);
|
||||
expect(getRobotReviewTimeoutMs({ PI_LGTM_ROBOT_REVIEW_TIMEOUT_MS: "bad" } as NodeJS.ProcessEnv)).toBe(DEFAULT_ROBOT_REVIEW_TIMEOUT_MS);
|
||||
});
|
||||
|
||||
it("times out bounded child commands", async () => {
|
||||
await expect(runRobotReviewCommand({
|
||||
command: process.execPath,
|
||||
args: ["-e", "setTimeout(() => {}, 1000)"],
|
||||
}, undefined, 25)).rejects.toThrow(/timed out/i);
|
||||
});
|
||||
|
||||
it("extracts assistant text from a child jsonl process", async () => {
|
||||
const script = [
|
||||
"process.stdout.write(JSON.stringify({type:'message_update'}) + '\\n');",
|
||||
"process.stdout.write(JSON.stringify({type:'message_end',message:{role:'assistant',content:[{type:'text',text:'ROBOT_REVIEW_JSON_START {\\\"accepted\\\":true,\\\"observations\\\":[\\\"ok\\\"]} ROBOT_REVIEW_JSON_END'}]}}) + '\\n');",
|
||||
].join("");
|
||||
const result = await runRobotReviewCommand({
|
||||
command: process.execPath,
|
||||
args: ["-e", script],
|
||||
}, undefined, 500);
|
||||
expect(result.exitCode).toBe(0);
|
||||
expect(result.stdout).toContain("ROBOT_REVIEW_JSON_END");
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user