mirror of
https://github.com/wassname/pi-lgtm.git
synced 2026-06-27 16:46:17 +08:00
feat: two-tier task model — trivial tasks self-complete, lgtm gates significant claims
Reviewer feedback: the LGTM extension's epistemic core is good but UX is too ceremonial — every task forced through lgtm_ask + /lgtm even bookkeeping like "monitor pueue 30". Two-tier split: - Tasks: agent-managed. TaskUpdate(status=completed) now allowed when no lgtm evidence is stored. Trivial subtasks lead up to verification without ceremony. - LGTMs: significant claims. lgtm_ask still triggers robot review; once evidence is stored, completion is locked to /lgtm so the gate can't be bypassed. Other UX: - TaskList output grouped: Active / Awaiting sign-off / Pending / Completed. - New getDisplayStatus(task) derives awaiting_signoff from pending_approval. - Widget header shows N awaiting sign-off count. - /lgtm accepts multiple ids: /lgtm 1 2 3 (also #1, commas). - lgtm_ask field descriptions encourage one short sentence per field — keep thinking discipline, drop verbosity. - SYSTEM_REMINDER nudges progress updates and cleanup of completed/irrelevant tasks, not just lgtm_ask. Also includes pending rubric extension on RobotReviewRecord. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+26
-10
@@ -1,5 +1,5 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { getReviewBadges, REVIEW_BADGES } from "../src/review-badges.js";
|
||||
import { getDisplayStatus, getReviewBadges } from "../src/review-badges.js";
|
||||
import type { Task } from "../src/types.js";
|
||||
|
||||
function makeTask(overrides: Partial<Task> = {}): Task {
|
||||
@@ -21,11 +21,11 @@ function makeTask(overrides: Partial<Task> = {}): Task {
|
||||
}
|
||||
|
||||
describe("getReviewBadges", () => {
|
||||
it("returns no badges when no review artifacts exist", () => {
|
||||
expect(getReviewBadges(makeTask())).toEqual([]);
|
||||
it("renders all dots when no artifacts exist", () => {
|
||||
expect(getReviewBadges(makeTask())).toBe("[···]");
|
||||
});
|
||||
|
||||
it("returns tool, robot, and human badges independently", () => {
|
||||
it("fills tool/robot/human slots independently", () => {
|
||||
const task = makeTask({
|
||||
pending_approval: true,
|
||||
metadata: {
|
||||
@@ -46,11 +46,7 @@ describe("getReviewBadges", () => {
|
||||
},
|
||||
});
|
||||
|
||||
expect(getReviewBadges(task)).toEqual([
|
||||
REVIEW_BADGES.tool,
|
||||
REVIEW_BADGES.robot,
|
||||
REVIEW_BADGES.human,
|
||||
]);
|
||||
expect(getReviewBadges(task)).toBe("[🛠🤖👀]");
|
||||
});
|
||||
|
||||
it("hides the human badge once the task is completed", () => {
|
||||
@@ -60,6 +56,26 @@ describe("getReviewBadges", () => {
|
||||
metadata: { lgtm_evidence: "ok" },
|
||||
});
|
||||
|
||||
expect(getReviewBadges(task)).toEqual([REVIEW_BADGES.tool]);
|
||||
expect(getReviewBadges(task)).toBe("[🛠··]");
|
||||
});
|
||||
});
|
||||
|
||||
describe("getDisplayStatus", () => {
|
||||
it("returns pending for fresh tasks", () => {
|
||||
expect(getDisplayStatus(makeTask())).toBe("pending");
|
||||
});
|
||||
|
||||
it("returns in_progress for active tasks not yet escalated", () => {
|
||||
expect(getDisplayStatus(makeTask({ status: "in_progress" }))).toBe("in_progress");
|
||||
});
|
||||
|
||||
it("returns awaiting_signoff when pending_approval is set", () => {
|
||||
expect(getDisplayStatus(makeTask({ status: "in_progress", pending_approval: true })))
|
||||
.toBe("awaiting_signoff");
|
||||
});
|
||||
|
||||
it("returns completed regardless of pending_approval flag", () => {
|
||||
expect(getDisplayStatus(makeTask({ status: "completed", pending_approval: true })))
|
||||
.toBe("completed");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -76,3 +76,4 @@ describe("robot review helpers", () => {
|
||||
expect(task2.metadata.robot_review_iteration_count).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
+19
-3
@@ -168,9 +168,25 @@ describe("TaskStore (in-memory)", () => {
|
||||
expect(store.list()[0].id).toBe("2");
|
||||
});
|
||||
|
||||
it("throws on update status=completed (must use /lgtm)", () => {
|
||||
store.create("Test", "Desc", "done");
|
||||
expect(() => store.update("1", { status: "completed" as any })).toThrow("Use /lgtm");
|
||||
it("allows TaskUpdate(status=completed) for trivial tasks (no lgtm evidence)", () => {
|
||||
store.create("Trivial", "Desc", "done");
|
||||
const { task, changedFields } = store.update("1", { status: "completed" });
|
||||
expect(task!.status).toBe("completed");
|
||||
expect(changedFields).toContain("status");
|
||||
});
|
||||
|
||||
it("blocks TaskUpdate(status=completed) when pending_approval=true", () => {
|
||||
store.create("Significant", "Desc", "done");
|
||||
store.update("1", { pending_approval: true });
|
||||
expect(() => store.update("1", { status: "completed" })).toThrow("/lgtm");
|
||||
});
|
||||
|
||||
it("blocks TaskUpdate(status=completed) when lgtm evidence is stored (even if review rejected)", () => {
|
||||
store.create("Escalated", "Desc", "done");
|
||||
// lgtm_ask path stores evidence; if robot review rejects, pending_approval becomes false.
|
||||
// The agent must not be able to bypass the gate by self-completing afterwards.
|
||||
store.update("1", { metadata: { lgtm_evidence: "literal output" }, pending_approval: false });
|
||||
expect(() => store.update("1", { status: "completed" })).toThrow("/lgtm");
|
||||
});
|
||||
|
||||
it("returns not found for update on non-existent task", () => {
|
||||
|
||||
Reference in New Issue
Block a user