feat: two-tier task model — trivial tasks self-complete, lgtm gates significant claims

Reviewer feedback: the LGTM extension's epistemic core is good but UX is too
ceremonial — every task forced through lgtm_ask + /lgtm even bookkeeping like
"monitor pueue 30". Two-tier split:

- Tasks: agent-managed. TaskUpdate(status=completed) now allowed when no lgtm
  evidence is stored. Trivial subtasks lead up to verification without ceremony.
- LGTMs: significant claims. lgtm_ask still triggers robot review; once evidence
  is stored, completion is locked to /lgtm so the gate can't be bypassed.

Other UX:
- TaskList output grouped: Active / Awaiting sign-off / Pending / Completed.
- New getDisplayStatus(task) derives awaiting_signoff from pending_approval.
- Widget header shows N awaiting sign-off count.
- /lgtm accepts multiple ids: /lgtm 1 2 3 (also #1, commas).
- lgtm_ask field descriptions encourage one short sentence per field — keep
  thinking discipline, drop verbosity.
- SYSTEM_REMINDER nudges progress updates and cleanup of completed/irrelevant
  tasks, not just lgtm_ask.

Also includes pending rubric extension on RobotReviewRecord.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
wassname
2026-04-25 18:18:48 +08:00
parent d908f6f617
commit 5b800653a3
9 changed files with 315 additions and 116 deletions
+26 -10
View File
@@ -1,5 +1,5 @@
import { describe, expect, it } from "vitest";
import { getReviewBadges, REVIEW_BADGES } from "../src/review-badges.js";
import { getDisplayStatus, getReviewBadges } from "../src/review-badges.js";
import type { Task } from "../src/types.js";
function makeTask(overrides: Partial<Task> = {}): Task {
@@ -21,11 +21,11 @@ function makeTask(overrides: Partial<Task> = {}): Task {
}
describe("getReviewBadges", () => {
it("returns no badges when no review artifacts exist", () => {
expect(getReviewBadges(makeTask())).toEqual([]);
it("renders all dots when no artifacts exist", () => {
expect(getReviewBadges(makeTask())).toBe("[···]");
});
it("returns tool, robot, and human badges independently", () => {
it("fills tool/robot/human slots independently", () => {
const task = makeTask({
pending_approval: true,
metadata: {
@@ -46,11 +46,7 @@ describe("getReviewBadges", () => {
},
});
expect(getReviewBadges(task)).toEqual([
REVIEW_BADGES.tool,
REVIEW_BADGES.robot,
REVIEW_BADGES.human,
]);
expect(getReviewBadges(task)).toBe("[🛠🤖👀]");
});
it("hides the human badge once the task is completed", () => {
@@ -60,6 +56,26 @@ describe("getReviewBadges", () => {
metadata: { lgtm_evidence: "ok" },
});
expect(getReviewBadges(task)).toEqual([REVIEW_BADGES.tool]);
expect(getReviewBadges(task)).toBe("[🛠··]");
});
});
describe("getDisplayStatus", () => {
it("returns pending for fresh tasks", () => {
expect(getDisplayStatus(makeTask())).toBe("pending");
});
it("returns in_progress for active tasks not yet escalated", () => {
expect(getDisplayStatus(makeTask({ status: "in_progress" }))).toBe("in_progress");
});
it("returns awaiting_signoff when pending_approval is set", () => {
expect(getDisplayStatus(makeTask({ status: "in_progress", pending_approval: true })))
.toBe("awaiting_signoff");
});
it("returns completed regardless of pending_approval flag", () => {
expect(getDisplayStatus(makeTask({ status: "completed", pending_approval: true })))
.toBe("completed");
});
});
+1
View File
@@ -76,3 +76,4 @@ describe("robot review helpers", () => {
expect(task2.metadata.robot_review_iteration_count).toBe(2);
});
});
+19 -3
View File
@@ -168,9 +168,25 @@ describe("TaskStore (in-memory)", () => {
expect(store.list()[0].id).toBe("2");
});
it("throws on update status=completed (must use /lgtm)", () => {
store.create("Test", "Desc", "done");
expect(() => store.update("1", { status: "completed" as any })).toThrow("Use /lgtm");
it("allows TaskUpdate(status=completed) for trivial tasks (no lgtm evidence)", () => {
store.create("Trivial", "Desc", "done");
const { task, changedFields } = store.update("1", { status: "completed" });
expect(task!.status).toBe("completed");
expect(changedFields).toContain("status");
});
it("blocks TaskUpdate(status=completed) when pending_approval=true", () => {
store.create("Significant", "Desc", "done");
store.update("1", { pending_approval: true });
expect(() => store.update("1", { status: "completed" })).toThrow("/lgtm");
});
it("blocks TaskUpdate(status=completed) when lgtm evidence is stored (even if review rejected)", () => {
store.create("Escalated", "Desc", "done");
// lgtm_ask path stores evidence; if robot review rejects, pending_approval becomes false.
// The agent must not be able to bypass the gate by self-completing afterwards.
store.update("1", { metadata: { lgtm_evidence: "literal output" }, pending_approval: false });
expect(() => store.update("1", { status: "completed" })).toThrow("/lgtm");
});
it("returns not found for update on non-existent task", () => {