Files
pi-lgtm/test/robot-review.test.ts
T
wassname 5b800653a3 feat: two-tier task model — trivial tasks self-complete, lgtm gates significant claims
Reviewer feedback: the LGTM extension's epistemic core is good but UX is too
ceremonial — every task forced through lgtm_ask + /lgtm even bookkeeping like
"monitor pueue 30". Two-tier split:

- Tasks: agent-managed. TaskUpdate(status=completed) now allowed when no lgtm
  evidence is stored. Trivial subtasks lead up to verification without ceremony.
- LGTMs: significant claims. lgtm_ask still triggers robot review; once evidence
  is stored, completion is locked to /lgtm so the gate can't be bypassed.

Other UX:
- TaskList output grouped: Active / Awaiting sign-off / Pending / Completed.
- New getDisplayStatus(task) derives awaiting_signoff from pending_approval.
- Widget header shows N awaiting sign-off count.
- /lgtm accepts multiple ids: /lgtm 1 2 3 (also #1, commas).
- lgtm_ask field descriptions encourage one short sentence per field — keep
  thinking discipline, drop verbosity.
- SYSTEM_REMINDER nudges progress updates and cleanup of completed/irrelevant
  tasks, not just lgtm_ask.

Also includes pending rubric extension on RobotReviewRecord.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-25 18:18:48 +08:00

80 lines
2.6 KiB
TypeScript

import { describe, expect, it } from "vitest";
import { appendRobotReviewMetadata, getLatestRobotReview, getRobotReviews } from "../src/robot-review.js";
import type { Task } from "../src/types.js";
function makeTask(overrides: Partial<Task> = {}): Task {
return {
id: "1",
subject: "Test",
description: "Desc",
done_criterion: "done",
pending_approval: false,
status: "pending",
progress_label: undefined,
metadata: {},
blocks: [],
blockedBy: [],
createdAt: 0,
updatedAt: 0,
...overrides,
};
}
describe("robot review helpers", () => {
it("reads legacy single-review metadata", () => {
const task = makeTask({
metadata: {
robot_review_reviewer: "opencode",
robot_review_scope: "task evidence",
robot_review_observations: ["Observed no command output for the core claim"],
robot_review_blind_spots: "Did not rerun tests",
robot_review_submitted_at: "2026-04-17T00:00:00.000Z",
},
});
const reviews = getRobotReviews(task);
expect(reviews).toHaveLength(1);
expect(reviews[0].reviewer).toBe("opencode");
expect(reviews[0].iteration).toBe(1);
expect(reviews[0].accepted).toBe(true);
});
it("appends robot reviews as iterations", () => {
const task = makeTask();
const metadata1 = appendRobotReviewMetadata(task, {
reviewer: "opencode",
scope: "task evidence",
observations: ["Observed missing benchmark output"],
blind_spots: "Did not inspect prod config",
accepted: false,
evidence_complete: false,
evidence_convincing: false,
missing_evidence: ["Benchmark output for the claimed speedup"],
submitted_at: "2026-04-17T00:00:00.000Z",
mode: "auto",
});
const task1 = makeTask({ metadata: metadata1 });
const metadata2 = appendRobotReviewMetadata(task1, {
reviewer: "opencode",
scope: "updated task evidence",
observations: ["Observed benchmark output and test transcript"],
blind_spots: "Did not inspect long-run stability",
accepted: true,
evidence_complete: true,
evidence_convincing: true,
missing_evidence: [],
submitted_at: "2026-04-17T01:00:00.000Z",
mode: "auto",
});
const task2 = makeTask({ metadata: metadata2 });
const reviews = getRobotReviews(task2);
expect(reviews).toHaveLength(2);
expect(reviews[0].iteration).toBe(1);
expect(reviews[1].iteration).toBe(2);
expect(getLatestRobotReview(task2)?.evidence_convincing).toBe(true);
expect(task2.metadata.robot_review_iteration_count).toBe(2);
});
});