mirror of
https://github.com/wassname/pi-lgtm.git
synced 2026-06-27 15:16:15 +08:00
Gut robot review, rebuild as lean task/goal list with evidence sign-off
- Delete robot-review.ts, review-badges.ts, TaskClaimDone, robot_review_ask, robot_review_run, lgtm_supersede - Replace with 5 tools: TaskCreate, TaskList, TaskGet, TaskUpdate, TaskComplete - Goals have done_criterion + failure_mode, sign off via TaskComplete(evidence, failure_likely) - Subtasks have parentId, mark done via TaskUpdate - Plain tasks have just subject, mark done via TaskUpdate - TaskComplete spawns pi-subagents RPC sanity check (skips if unavailable) - Port reminder cadence from pi-tasks (injects reminder when agent idle for N turns) - Guard ctx?.ui and turnCtx?.ui against undefined - Net -3,477 lines (3,632 → 1,402 src lines)
This commit is contained in:
+377
-2315
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,66 @@
|
||||
/**
|
||||
* reminder-cadence.ts — Pure cadence logic for system-reminder injection.
|
||||
*
|
||||
* When the agent hasn't used task tools for N turns, inject a reminder
|
||||
* to keep working toward active goals. Ported from pi-tasks.
|
||||
*/
|
||||
|
||||
export interface CadenceState {
|
||||
currentTurn: number;
|
||||
lastTaskToolUseTurn: number;
|
||||
reminderInjectedThisCycle: boolean;
|
||||
reminderDue: boolean;
|
||||
}
|
||||
|
||||
export interface CadenceConfig {
|
||||
reminderInterval: number;
|
||||
taskToolNames: ReadonlySet<string>;
|
||||
}
|
||||
|
||||
export function createCadenceState(): CadenceState {
|
||||
return {
|
||||
currentTurn: 0,
|
||||
lastTaskToolUseTurn: 0,
|
||||
reminderInjectedThisCycle: false,
|
||||
reminderDue: false,
|
||||
};
|
||||
}
|
||||
|
||||
export function resetCadenceState(state: CadenceState): void {
|
||||
state.currentTurn = 0;
|
||||
state.lastTaskToolUseTurn = 0;
|
||||
state.reminderInjectedThisCycle = false;
|
||||
state.reminderDue = false;
|
||||
}
|
||||
|
||||
export function onTurnStart(state: CadenceState): void {
|
||||
state.currentTurn++;
|
||||
}
|
||||
|
||||
export function evaluateToolResult(
|
||||
state: CadenceState,
|
||||
toolName: string,
|
||||
hasTasks: boolean,
|
||||
config: CadenceConfig,
|
||||
): void {
|
||||
if (config.taskToolNames.has(toolName)) {
|
||||
state.lastTaskToolUseTurn = state.currentTurn;
|
||||
state.reminderInjectedThisCycle = false;
|
||||
state.reminderDue = false;
|
||||
return;
|
||||
}
|
||||
|
||||
if (state.currentTurn - state.lastTaskToolUseTurn < config.reminderInterval) return;
|
||||
if (state.reminderInjectedThisCycle) return;
|
||||
if (!hasTasks) return;
|
||||
|
||||
state.reminderDue = true;
|
||||
}
|
||||
|
||||
export function drainReminderForContext(state: CadenceState): boolean {
|
||||
if (!state.reminderDue) return false;
|
||||
state.reminderDue = false;
|
||||
state.reminderInjectedThisCycle = true;
|
||||
state.lastTaskToolUseTurn = state.currentTurn;
|
||||
return true;
|
||||
}
|
||||
@@ -1,86 +0,0 @@
|
||||
import { getLatestRobotReview } from "./robot-review.js";
|
||||
import type { Task } from "./types.js";
|
||||
|
||||
function hasCurrentEvidence(task: Task): boolean {
|
||||
return (
|
||||
typeof task.metadata?.lgtm_evidence === "string" &&
|
||||
task.metadata.lgtm_evidence.length > 0
|
||||
);
|
||||
}
|
||||
|
||||
function hasEvidenceHistory(task: Task): boolean {
|
||||
return (
|
||||
Array.isArray(task.metadata?.lgtm_history) &&
|
||||
task.metadata.lgtm_history.length > 0
|
||||
);
|
||||
}
|
||||
|
||||
export type DisplayStatus = "in_progress" | "pending" | "completed";
|
||||
|
||||
export function getDisplayStatus(task: Task): DisplayStatus {
|
||||
return task.status;
|
||||
}
|
||||
|
||||
export type CompletionMode = "direct" | "proof";
|
||||
export type ReviewState =
|
||||
| "no_claim"
|
||||
| "claim_submitted"
|
||||
| "reviewer_failed_to_run"
|
||||
| "reviewer_rejected"
|
||||
| "reviewer_accepted"
|
||||
| "superseded"
|
||||
| "completed";
|
||||
export function getCompletionMode(task: Task): CompletionMode {
|
||||
return task.parentId ? "direct" : "proof";
|
||||
}
|
||||
|
||||
export function getReviewState(task: Task): ReviewState {
|
||||
if (task.status === "completed") return "completed";
|
||||
const latest = getLatestRobotReview(task);
|
||||
if (latest && !latest.accepted) return "reviewer_rejected";
|
||||
if (latest?.accepted) return "reviewer_accepted";
|
||||
if (typeof task.metadata?.robot_review_last_error === "string")
|
||||
return "reviewer_failed_to_run";
|
||||
if (hasCurrentEvidence(task)) return "claim_submitted";
|
||||
if (hasEvidenceHistory(task)) return "superseded";
|
||||
return "no_claim";
|
||||
}
|
||||
|
||||
export function needsProofAttention(task: Task): boolean {
|
||||
if (task.parentId || task.status === "completed") return false;
|
||||
const state = getReviewState(task);
|
||||
return (
|
||||
state === "reviewer_rejected" ||
|
||||
state === "reviewer_accepted" ||
|
||||
state === "reviewer_failed_to_run"
|
||||
);
|
||||
}
|
||||
|
||||
export function getGateStatus(task: Task): string {
|
||||
const state = getReviewState(task);
|
||||
if (task.parentId) {
|
||||
return task.status === "completed"
|
||||
? "completed directly as subtask"
|
||||
: "subtask: direct completion allowed";
|
||||
}
|
||||
if (task.status === "completed") {
|
||||
if (typeof task.metadata?.robot_review_last_error === "string") {
|
||||
return `completed with reviewer unavailable: ${task.metadata.robot_review_last_error}`;
|
||||
}
|
||||
if (getLatestRobotReview(task)?.accepted)
|
||||
return "completed after accepted proof review";
|
||||
return "completed";
|
||||
}
|
||||
if (state === "no_claim")
|
||||
return "top-level task requires TaskClaimDone evidence before completion";
|
||||
if (state === "reviewer_accepted")
|
||||
return "review accepted; task should be completed";
|
||||
if (state === "reviewer_failed_to_run") {
|
||||
return `review unavailable; autonomy continues: ${task.metadata.robot_review_last_error}`;
|
||||
}
|
||||
if (state === "reviewer_rejected")
|
||||
return "latest proof review rejected the evidence; strengthen the proof and try again";
|
||||
if (state === "superseded")
|
||||
return "current evidence superseded, waiting for a new proof claim";
|
||||
return "proof claim submitted, automatic review still required";
|
||||
}
|
||||
@@ -1,311 +0,0 @@
|
||||
import type { Task } from "./types.js";
|
||||
|
||||
export type RobotReviewMode = "manual" | "auto";
|
||||
|
||||
export interface RobotReviewRecord {
|
||||
iteration: number;
|
||||
reviewer: string;
|
||||
scope: string;
|
||||
reason?: string;
|
||||
observations: string[];
|
||||
concerns: string[];
|
||||
suggestions: string[];
|
||||
blind_spots: string;
|
||||
accepted: boolean;
|
||||
evidence_complete: boolean;
|
||||
evidence_convincing: boolean;
|
||||
missing_evidence: string[];
|
||||
submitted_at: string;
|
||||
mode: RobotReviewMode;
|
||||
raw_output?: string;
|
||||
rubric?: Record<string, { reason: string; pass: boolean }>;
|
||||
}
|
||||
|
||||
function toStringArray(value: unknown): string[] {
|
||||
return Array.isArray(value)
|
||||
? value.filter((item): item is string => typeof item === "string")
|
||||
: [];
|
||||
}
|
||||
|
||||
function extractRubric(
|
||||
value: unknown,
|
||||
): Record<string, { reason: string; pass: boolean }> | undefined {
|
||||
if (!value || typeof value !== "object") return undefined;
|
||||
const r: Record<string, { reason: string; pass: boolean }> = {};
|
||||
for (const [key, val] of Object.entries(value as Record<string, unknown>)) {
|
||||
if (
|
||||
val &&
|
||||
typeof val === "object" &&
|
||||
"reason" in (val as any) &&
|
||||
"pass" in (val as any)
|
||||
) {
|
||||
const v = val as { reason: unknown; pass: unknown };
|
||||
r[key] = {
|
||||
reason: typeof v.reason === "string" ? v.reason : "",
|
||||
pass: v.pass === true,
|
||||
};
|
||||
}
|
||||
}
|
||||
return Object.keys(r).length > 0 ? r : undefined;
|
||||
}
|
||||
|
||||
function normalizeReview(
|
||||
value: unknown,
|
||||
index: number,
|
||||
): RobotReviewRecord | undefined {
|
||||
if (!value || typeof value !== "object") return undefined;
|
||||
const review = value as Record<string, unknown>;
|
||||
const reviewer =
|
||||
typeof review.reviewer === "string" ? review.reviewer : "unknown";
|
||||
const scope = typeof review.scope === "string" ? review.scope : "unknown";
|
||||
const observations = toStringArray(review.observations);
|
||||
if (observations.length === 0) return undefined;
|
||||
return {
|
||||
iteration:
|
||||
typeof review.iteration === "number" ? review.iteration : index + 1,
|
||||
reviewer,
|
||||
scope,
|
||||
reason:
|
||||
typeof review.reason === "string" ? review.reason : undefined,
|
||||
observations,
|
||||
concerns: toStringArray(review.concerns),
|
||||
suggestions: toStringArray(review.suggestions),
|
||||
blind_spots:
|
||||
typeof review.blind_spots === "string"
|
||||
? review.blind_spots
|
||||
: "not recorded",
|
||||
accepted:
|
||||
typeof review.accepted === "boolean"
|
||||
? review.accepted
|
||||
: (typeof review.evidence_complete === "boolean"
|
||||
? review.evidence_complete
|
||||
: true) &&
|
||||
(typeof review.evidence_convincing === "boolean"
|
||||
? review.evidence_convincing
|
||||
: true),
|
||||
evidence_complete:
|
||||
typeof review.evidence_complete === "boolean"
|
||||
? review.evidence_complete
|
||||
: true,
|
||||
evidence_convincing:
|
||||
typeof review.evidence_convincing === "boolean"
|
||||
? review.evidence_convincing
|
||||
: true,
|
||||
missing_evidence: toStringArray(review.missing_evidence),
|
||||
submitted_at:
|
||||
typeof review.submitted_at === "string"
|
||||
? review.submitted_at
|
||||
: new Date(0).toISOString(),
|
||||
mode: review.mode === "auto" ? "auto" : "manual",
|
||||
raw_output:
|
||||
typeof review.raw_output === "string" ? review.raw_output : undefined,
|
||||
rubric: extractRubric(review.rubric),
|
||||
};
|
||||
}
|
||||
|
||||
function getLegacyRobotReview(task: Task): RobotReviewRecord | undefined {
|
||||
const observations = toStringArray(task.metadata?.robot_review_observations);
|
||||
if (observations.length === 0) return undefined;
|
||||
return {
|
||||
iteration: 1,
|
||||
reviewer:
|
||||
typeof task.metadata?.robot_review_reviewer === "string"
|
||||
? task.metadata.robot_review_reviewer
|
||||
: "unknown",
|
||||
scope:
|
||||
typeof task.metadata?.robot_review_scope === "string"
|
||||
? task.metadata.robot_review_scope
|
||||
: "unknown",
|
||||
reason:
|
||||
typeof task.metadata?.robot_review_reason === "string"
|
||||
? task.metadata.robot_review_reason
|
||||
: undefined,
|
||||
observations,
|
||||
concerns: toStringArray(task.metadata?.robot_review_concerns),
|
||||
suggestions: toStringArray(task.metadata?.robot_review_suggestions),
|
||||
blind_spots:
|
||||
typeof task.metadata?.robot_review_blind_spots === "string"
|
||||
? task.metadata.robot_review_blind_spots
|
||||
: "not recorded",
|
||||
accepted:
|
||||
typeof task.metadata?.robot_review_accepted === "boolean"
|
||||
? task.metadata.robot_review_accepted
|
||||
: (typeof task.metadata?.robot_review_evidence_complete === "boolean"
|
||||
? task.metadata.robot_review_evidence_complete
|
||||
: true) &&
|
||||
(typeof task.metadata?.robot_review_evidence_convincing === "boolean"
|
||||
? task.metadata.robot_review_evidence_convincing
|
||||
: true),
|
||||
evidence_complete:
|
||||
typeof task.metadata?.robot_review_evidence_complete === "boolean"
|
||||
? task.metadata.robot_review_evidence_complete
|
||||
: true,
|
||||
evidence_convincing:
|
||||
typeof task.metadata?.robot_review_evidence_convincing === "boolean"
|
||||
? task.metadata.robot_review_evidence_convincing
|
||||
: true,
|
||||
missing_evidence: toStringArray(
|
||||
task.metadata?.robot_review_missing_evidence,
|
||||
),
|
||||
submitted_at:
|
||||
typeof task.metadata?.robot_review_submitted_at === "string"
|
||||
? task.metadata.robot_review_submitted_at
|
||||
: new Date(0).toISOString(),
|
||||
mode: task.metadata?.robot_review_mode === "auto" ? "auto" : "manual",
|
||||
raw_output:
|
||||
typeof task.metadata?.robot_review_raw_output === "string"
|
||||
? task.metadata.robot_review_raw_output
|
||||
: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
export function getRobotReviews(task: Task): RobotReviewRecord[] {
|
||||
const reviews = Array.isArray(task.metadata?.robot_reviews)
|
||||
? task.metadata.robot_reviews
|
||||
.map((review: unknown, index: number) => normalizeReview(review, index))
|
||||
.filter((review): review is RobotReviewRecord => review !== undefined)
|
||||
: [];
|
||||
if (reviews.length > 0) {
|
||||
return reviews.map((review, index) => ({
|
||||
...review,
|
||||
iteration: index + 1,
|
||||
}));
|
||||
}
|
||||
const legacy = getLegacyRobotReview(task);
|
||||
return legacy ? [legacy] : [];
|
||||
}
|
||||
|
||||
export function getLatestRobotReview(
|
||||
task: Task,
|
||||
): RobotReviewRecord | undefined {
|
||||
const reviews = getRobotReviews(task);
|
||||
return reviews.length > 0 ? reviews[reviews.length - 1] : undefined;
|
||||
}
|
||||
|
||||
function hasNonEmptyString(value: unknown): boolean {
|
||||
return typeof value === "string" && value.trim().length > 0;
|
||||
}
|
||||
|
||||
export function hasCompleteProofClaim(task: Task): boolean {
|
||||
const metadata = task.metadata ?? {};
|
||||
return (
|
||||
[
|
||||
metadata.lgtm_evidence,
|
||||
metadata.lgtm_failure_likely,
|
||||
metadata.lgtm_failure_sneaky,
|
||||
metadata.lgtm_failure_unknown,
|
||||
metadata.lgtm_falsification_test,
|
||||
metadata.lgtm_evidence_reasoning,
|
||||
metadata.lgtm_remaining_uncertainty,
|
||||
].every(hasNonEmptyString) &&
|
||||
Array.isArray(metadata.lgtm_verification_hints) &&
|
||||
metadata.lgtm_verification_hints.some(hasNonEmptyString)
|
||||
);
|
||||
}
|
||||
|
||||
export function shouldCompleteAfterAcceptedReview(
|
||||
task: Task,
|
||||
reviewAccepted: boolean,
|
||||
): boolean {
|
||||
return reviewAccepted && hasCompleteProofClaim(task);
|
||||
}
|
||||
|
||||
export function relaxAdvisoryVerificationHints(
|
||||
review: Omit<RobotReviewRecord, "iteration">,
|
||||
): Omit<RobotReviewRecord, "iteration"> {
|
||||
const rubric = review.rubric;
|
||||
if (!rubric || review.evidence_complete !== true) return review;
|
||||
const requiredCoreKeys = [
|
||||
"evidence_covers_done_criterion",
|
||||
"falsification_test_runnable",
|
||||
];
|
||||
if (!requiredCoreKeys.every((key) => rubric[key]?.pass === true))
|
||||
return review;
|
||||
const failedKeys = Object.entries(rubric)
|
||||
.filter(([, item]) => item.pass !== true)
|
||||
.map(([key]) => key);
|
||||
const advisoryKeys = [
|
||||
"failure_modes_addressed",
|
||||
"evidence_distinguishes_success",
|
||||
"verification_hints_actionable",
|
||||
];
|
||||
if (
|
||||
failedKeys.length === 0 ||
|
||||
!failedKeys.every((key) => advisoryKeys.includes(key))
|
||||
)
|
||||
return review;
|
||||
|
||||
const advisoryNotes: string[] = [];
|
||||
if (failedKeys.includes("failure_modes_addressed")) {
|
||||
advisoryNotes.push(
|
||||
"Failure-mode writeup was weak, but treated as advisory because the verbatim evidence already covered the done criterion.",
|
||||
);
|
||||
}
|
||||
if (failedKeys.includes("evidence_distinguishes_success")) {
|
||||
advisoryNotes.push(
|
||||
"Why-this-proves-it reasoning was weak, but treated as advisory because the packet already contained direct success evidence.",
|
||||
);
|
||||
}
|
||||
if (failedKeys.includes("verification_hints_actionable")) {
|
||||
advisoryNotes.push(
|
||||
"Verification hints were weak, but treated as advisory because the verbatim evidence already covered the done criterion.",
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
...review,
|
||||
accepted: true,
|
||||
evidence_convincing: true,
|
||||
observations: [...review.observations, ...advisoryNotes],
|
||||
concerns: review.concerns,
|
||||
suggestions: review.suggestions,
|
||||
missing_evidence: review.missing_evidence.filter(
|
||||
(item) =>
|
||||
!advisoryKeys.includes(item) &&
|
||||
!/verification hint/i.test(item) &&
|
||||
!/failure[- ]?mode/i.test(item) &&
|
||||
!/distinguish/i.test(item),
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
export function appendRobotReviewMetadata(
|
||||
task: Task,
|
||||
review: Omit<RobotReviewRecord, "iteration">,
|
||||
): Record<string, unknown> {
|
||||
const robot_reviews = [
|
||||
...getRobotReviews(task),
|
||||
{ ...review, iteration: 0 },
|
||||
].map((entry, index) => ({
|
||||
...entry,
|
||||
accepted: entry.accepted,
|
||||
iteration: index + 1,
|
||||
}));
|
||||
const latest = robot_reviews[robot_reviews.length - 1];
|
||||
return {
|
||||
robot_reviews,
|
||||
robot_review_reviewer: latest.reviewer,
|
||||
robot_review_scope: latest.scope,
|
||||
robot_review_observations: latest.observations,
|
||||
robot_review_concerns: latest.concerns,
|
||||
robot_review_suggestions: latest.suggestions,
|
||||
robot_review_blind_spots: latest.blind_spots,
|
||||
robot_review_accepted: latest.accepted,
|
||||
robot_review_evidence_complete: latest.evidence_complete,
|
||||
robot_review_evidence_convincing: latest.evidence_convincing,
|
||||
robot_review_missing_evidence: latest.missing_evidence,
|
||||
robot_review_submitted_at: latest.submitted_at,
|
||||
robot_review_mode: latest.mode,
|
||||
robot_review_reason: latest.reason ?? null,
|
||||
robot_review_raw_output: latest.raw_output ?? null,
|
||||
robot_review_requires_followup: !(
|
||||
latest.evidence_complete && latest.evidence_convincing
|
||||
),
|
||||
robot_review_iteration_count: robot_reviews.length,
|
||||
};
|
||||
}
|
||||
|
||||
export function latestRobotReviewPasses(task: Task): boolean {
|
||||
const latest = getLatestRobotReview(task);
|
||||
return latest ? latest.accepted : false;
|
||||
}
|
||||
+14
-15
@@ -126,11 +126,12 @@ export class TaskStore {
|
||||
|
||||
create(
|
||||
subject: string,
|
||||
description: string,
|
||||
done_criterion: string,
|
||||
done_criterion?: string,
|
||||
failure_mode?: string,
|
||||
progress_label?: string,
|
||||
metadata?: Record<string, any>,
|
||||
parentId?: string,
|
||||
description?: string,
|
||||
): Task {
|
||||
return this.withLock(() => {
|
||||
if (parentId && !this.tasks.has(parentId))
|
||||
@@ -140,7 +141,8 @@ export class TaskStore {
|
||||
id: String(this.nextId++),
|
||||
subject,
|
||||
description,
|
||||
done_criterion,
|
||||
done_criterion: done_criterion || undefined,
|
||||
failure_mode,
|
||||
parentId,
|
||||
status: "pending",
|
||||
progress_label,
|
||||
@@ -174,6 +176,7 @@ export class TaskStore {
|
||||
subject?: string;
|
||||
description?: string;
|
||||
done_criterion?: string;
|
||||
failure_mode?: string;
|
||||
progress_label?: string;
|
||||
metadata?: Record<string, any>;
|
||||
parentId?: string | null;
|
||||
@@ -188,14 +191,6 @@ export class TaskStore {
|
||||
const changedFields: string[] = [];
|
||||
const warnings: string[] = [];
|
||||
|
||||
// Subtasks are normal checklist items. Top-level tasks are goals and need a proof
|
||||
// claim plus automatic review; TaskClaimDone is the only agent path that completes them.
|
||||
if (fields.status === "completed" && !task.parentId) {
|
||||
throw new Error(
|
||||
`Top-level task #${id} requires proof. Use TaskClaimDone with evidence and failure modes; subtasks can be completed directly.`,
|
||||
);
|
||||
}
|
||||
|
||||
if (fields.status === "deleted") {
|
||||
this.tasks.delete(id);
|
||||
for (const t of this.tasks.values()) {
|
||||
@@ -213,14 +208,18 @@ export class TaskStore {
|
||||
task.subject = fields.subject;
|
||||
changedFields.push("subject");
|
||||
}
|
||||
if (fields.description !== undefined) {
|
||||
task.description = fields.description;
|
||||
changedFields.push("description");
|
||||
}
|
||||
if (fields.done_criterion !== undefined) {
|
||||
task.done_criterion = fields.done_criterion;
|
||||
changedFields.push("done_criterion");
|
||||
}
|
||||
if (fields.failure_mode !== undefined) {
|
||||
task.failure_mode = fields.failure_mode;
|
||||
changedFields.push("failure_mode");
|
||||
}
|
||||
if (fields.description !== undefined) {
|
||||
task.description = fields.description;
|
||||
changedFields.push("description");
|
||||
}
|
||||
if (fields.progress_label !== undefined) {
|
||||
task.progress_label = fields.progress_label;
|
||||
changedFields.push("progress_label");
|
||||
|
||||
+4
-8
@@ -1,12 +1,13 @@
|
||||
// <cwd>/.pi/tasks-config.json — persists extension settings across sessions
|
||||
|
||||
import { mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
||||
import { dirname, join } from "node:path";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
|
||||
export interface TasksConfig {
|
||||
taskScope?: "memory" | "session" | "project"; // default: "session"
|
||||
autoCascade?: boolean; // default: false
|
||||
autoClearCompleted?: "never" | "on_list_complete" | "on_task_complete"; // default: "never"
|
||||
reminderInterval?: number; // turns without task tool use before reminder. default: 4
|
||||
clearDelayTurns?: number; // how many turns completed tasks linger. default: 4
|
||||
}
|
||||
|
||||
const CONFIG_PATH = join(process.cwd(), ".pi", "tasks-config.json");
|
||||
@@ -18,8 +19,3 @@ export function loadTasksConfig(): TasksConfig {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
export function saveTasksConfig(config: TasksConfig): void {
|
||||
mkdirSync(dirname(CONFIG_PATH), { recursive: true });
|
||||
writeFileSync(CONFIG_PATH, JSON.stringify(config, null, 2));
|
||||
}
|
||||
|
||||
+9
-3
@@ -1,5 +1,10 @@
|
||||
/**
|
||||
* types.ts — Type definitions for the task management system.
|
||||
*
|
||||
* Three kinds of items, all stored as Task:
|
||||
* - Goal: has done_criterion + failure_mode. Completes via TaskComplete with evidence.
|
||||
* - Subtask: has parentId. Just subject. Completes via TaskUpdate.
|
||||
* - Task: no parentId, no done_criterion. Plain checklist item. Completes via TaskUpdate.
|
||||
*/
|
||||
|
||||
export type TaskStatus = "pending" | "in_progress" | "completed";
|
||||
@@ -7,9 +12,10 @@ export type TaskStatus = "pending" | "in_progress" | "completed";
|
||||
export interface Task {
|
||||
id: string;
|
||||
subject: string;
|
||||
description: string;
|
||||
done_criterion: string; // required: what "done" looks like
|
||||
parentId?: string; // no parent = top-level goal, requires proof claim to complete
|
||||
description?: string;
|
||||
done_criterion?: string;
|
||||
failure_mode?: string;
|
||||
parentId?: string;
|
||||
status: TaskStatus;
|
||||
progress_label?: string;
|
||||
metadata: Record<string, any>;
|
||||
|
||||
+11
-2
@@ -8,10 +8,19 @@
|
||||
* Completed tasks stay in storage but are hidden from the collapsed widget.
|
||||
*/
|
||||
|
||||
import { truncateToWidth } from "@mariozechner/pi-tui";
|
||||
import { getDisplayStatus } from "../review-badges.js";
|
||||
import type { Task } from "../types.js";
|
||||
import type { TaskStore } from "../task-store.js";
|
||||
|
||||
// Simple truncation fallback
|
||||
function truncateToWidth(line: string, maxWidth: number): string {
|
||||
if (line.length <= maxWidth) return line;
|
||||
return line.slice(0, maxWidth - 1) + "…";
|
||||
}
|
||||
|
||||
function getDisplayStatus(task: Task): "in_progress" | "pending" | "completed" {
|
||||
return task.status;
|
||||
}
|
||||
|
||||
// ---- Types ----
|
||||
|
||||
export type Theme = {
|
||||
|
||||
+15
-104
@@ -8,24 +8,22 @@ type RegisteredTool = {
|
||||
|
||||
type RegisteredCommand = {
|
||||
handler: (args: string, ctx: any) => Promise<void>;
|
||||
getArgumentCompletions?: (args: string) => Promise<string[]>;
|
||||
};
|
||||
|
||||
function makeHarness() {
|
||||
const tools = new Map<string, RegisteredTool>();
|
||||
const commands = new Map<string, RegisteredCommand>();
|
||||
const sentMessages: any[] = [];
|
||||
|
||||
const pi = {
|
||||
on: vi.fn(),
|
||||
events: { on: vi.fn(() => vi.fn()), emit: vi.fn() },
|
||||
registerTool: vi.fn((tool: RegisteredTool) => tools.set(tool.name, tool)),
|
||||
registerCommand: vi.fn((name: string, command: RegisteredCommand) =>
|
||||
commands.set(name, command),
|
||||
),
|
||||
sendMessage: vi.fn((message: any) => sentMessages.push(message)),
|
||||
};
|
||||
|
||||
proofTasksExtension(pi as any);
|
||||
proofTasksExtension(pi as any, { ui: undefined } as any);
|
||||
|
||||
async function execTool(name: string, params: Record<string, unknown>) {
|
||||
const tool = tools.get(name);
|
||||
@@ -33,22 +31,16 @@ function makeHarness() {
|
||||
return tool.execute("tool-call", params, undefined, undefined, {});
|
||||
}
|
||||
|
||||
function makeUi(
|
||||
overrides: {
|
||||
select?: Array<string | undefined>;
|
||||
confirm?: Array<boolean>;
|
||||
} = {},
|
||||
) {
|
||||
function makeUi(overrides: { select?: Array<string | undefined> } = {}) {
|
||||
const selectQueue = [...(overrides.select ?? [])];
|
||||
const confirmQueue = [...(overrides.confirm ?? [])];
|
||||
return {
|
||||
notify: vi.fn(),
|
||||
select: vi.fn(async () => selectQueue.shift()),
|
||||
confirm: vi.fn(async () => confirmQueue.shift() ?? false),
|
||||
input: vi.fn(async () => ""),
|
||||
};
|
||||
}
|
||||
|
||||
return { tools, commands, sentMessages, execTool, makeUi };
|
||||
return { tools, commands, execTool, makeUi };
|
||||
}
|
||||
|
||||
describe("parseLgtmArgs", () => {
|
||||
@@ -58,109 +50,28 @@ describe("parseLgtmArgs", () => {
|
||||
expect(parseLgtmArgs("1 #2")).toEqual({ kind: "view", ids: ["1", "2"] });
|
||||
});
|
||||
|
||||
it("rejects task-management forms", () => {
|
||||
expect(parseLgtmArgs("clear")).toEqual({
|
||||
kind: "error",
|
||||
message: "Task management lives in /tasks now. /lgtm is viewer-only.",
|
||||
});
|
||||
expect(parseLgtmArgs("clear *")).toEqual({
|
||||
kind: "error",
|
||||
message: "Task management lives in /tasks now. /lgtm is viewer-only.",
|
||||
});
|
||||
expect(parseLgtmArgs("clear #7")).toEqual({
|
||||
kind: "error",
|
||||
message: "Task management lives in /tasks now. /lgtm is viewer-only.",
|
||||
});
|
||||
expect(parseLgtmArgs("delete #7")).toEqual({
|
||||
kind: "error",
|
||||
message: "Task management lives in /tasks now. /lgtm is viewer-only.",
|
||||
});
|
||||
it("treats unknown args as view IDs", () => {
|
||||
// "clear" and "delete" are just treated as task IDs now
|
||||
expect(parseLgtmArgs("clear")).toEqual({ kind: "view", ids: ["clear"] });
|
||||
expect(parseLgtmArgs("1 2")).toEqual({ kind: "view", ids: ["1", "2"] });
|
||||
});
|
||||
});
|
||||
|
||||
describe("/lgtm command", () => {
|
||||
it("shows all open proof logs from the picker", async () => {
|
||||
it("shows proof logs from picker", async () => {
|
||||
const harness = makeHarness();
|
||||
await harness.execTool("TaskCreate", {
|
||||
subject: "Task A",
|
||||
description: "Desc",
|
||||
done_criterion: "done",
|
||||
});
|
||||
await harness.execTool("TaskCreate", {
|
||||
subject: "Task B",
|
||||
description: "Desc",
|
||||
done_criterion: "done",
|
||||
subject: "Goal A",
|
||||
done_criterion: "test passes",
|
||||
});
|
||||
|
||||
const ui = harness.makeUi({ select: ["View all open proof logs"] });
|
||||
const ui = harness.makeUi({ select: ["○★ #1 Goal A", "← Back"] });
|
||||
const command = harness.commands.get("lgtm");
|
||||
if (!command) throw new Error("/lgtm not registered");
|
||||
|
||||
await command.handler("", { ui });
|
||||
|
||||
expect(harness.sentMessages).toHaveLength(2);
|
||||
expect(harness.sentMessages[0].customType).toBe("proof-log");
|
||||
expect(harness.sentMessages[0].content).toContain("Task #1");
|
||||
expect(harness.sentMessages[1].content).toContain("Task #2");
|
||||
});
|
||||
|
||||
it("shows one proof log from the picker", async () => {
|
||||
const harness = makeHarness();
|
||||
await harness.execTool("TaskCreate", {
|
||||
subject: "Task A",
|
||||
description: "Desc",
|
||||
done_criterion: "done",
|
||||
});
|
||||
|
||||
const ui = harness.makeUi({ select: ["[PENDING] #1 Task A"] });
|
||||
const command = harness.commands.get("lgtm");
|
||||
if (!command) throw new Error("/lgtm not registered");
|
||||
|
||||
await command.handler("", { ui });
|
||||
|
||||
expect(harness.sentMessages).toHaveLength(1);
|
||||
expect(harness.sentMessages[0].content).toContain("Task #1");
|
||||
});
|
||||
|
||||
it("rejects /lgtm clear and points task management back to /tasks", async () => {
|
||||
const harness = makeHarness();
|
||||
await harness.execTool("TaskCreate", {
|
||||
subject: "Task A",
|
||||
description: "Desc",
|
||||
done_criterion: "done",
|
||||
});
|
||||
|
||||
const ui = harness.makeUi();
|
||||
const command = harness.commands.get("lgtm");
|
||||
if (!command) throw new Error("/lgtm not registered");
|
||||
|
||||
await command.handler("clear 1", { ui });
|
||||
|
||||
expect(harness.sentMessages).toHaveLength(0);
|
||||
expect(ui.notify).toHaveBeenCalledWith(
|
||||
"Task management lives in /tasks now. /lgtm is viewer-only.",
|
||||
"error",
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects /lgtm delete and points task management back to /tasks", async () => {
|
||||
const harness = makeHarness();
|
||||
await harness.execTool("TaskCreate", {
|
||||
subject: "Task A",
|
||||
description: "Desc",
|
||||
done_criterion: "done",
|
||||
});
|
||||
|
||||
const ui = harness.makeUi();
|
||||
const command = harness.commands.get("lgtm");
|
||||
if (!command) throw new Error("/lgtm not registered");
|
||||
|
||||
await command.handler("delete 1", { ui });
|
||||
|
||||
expect(harness.sentMessages).toHaveLength(0);
|
||||
expect(ui.notify).toHaveBeenCalledWith(
|
||||
"Task management lives in /tasks now. /lgtm is viewer-only.",
|
||||
"error",
|
||||
);
|
||||
// Should have shown the task in the select options
|
||||
expect(ui.select).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,145 +0,0 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
getCompletionMode,
|
||||
getDisplayStatus,
|
||||
getGateStatus,
|
||||
getReviewState,
|
||||
} from "../src/review-badges.js";
|
||||
import type { Task } from "../src/types.js";
|
||||
|
||||
function makeTask(overrides: Partial<Task> = {}): Task {
|
||||
return {
|
||||
id: "1",
|
||||
subject: "Test",
|
||||
description: "Desc",
|
||||
done_criterion: "done",
|
||||
status: "pending",
|
||||
progress_label: undefined,
|
||||
metadata: {},
|
||||
blocks: [],
|
||||
blockedBy: [],
|
||||
createdAt: 0,
|
||||
updatedAt: 0,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe("review state helpers", () => {
|
||||
it("reports completion mode as proof for top-level tasks", () => {
|
||||
expect(getCompletionMode(makeTask())).toBe("proof");
|
||||
});
|
||||
|
||||
it("reports completion mode as direct for subtasks", () => {
|
||||
expect(getCompletionMode(makeTask({ parentId: "1" }))).toBe("direct");
|
||||
});
|
||||
|
||||
it("reports superseded when only history remains", () => {
|
||||
expect(
|
||||
getReviewState(
|
||||
makeTask({ metadata: { lgtm_history: [{ iteration: 1 }] } }),
|
||||
),
|
||||
).toBe("superseded");
|
||||
});
|
||||
});
|
||||
|
||||
describe("getGateStatus", () => {
|
||||
it("reports top-level proof requirement before evidence", () => {
|
||||
expect(getGateStatus(makeTask())).toBe(
|
||||
"top-level task requires TaskClaimDone evidence before completion",
|
||||
);
|
||||
});
|
||||
|
||||
it("reports non-blocking reviewer failure", () => {
|
||||
expect(
|
||||
getGateStatus(
|
||||
makeTask({
|
||||
metadata: {
|
||||
lgtm_evidence: "ok",
|
||||
robot_review_last_error: "Unexpected token 'a'",
|
||||
},
|
||||
}),
|
||||
),
|
||||
).toContain("review unavailable; autonomy continues");
|
||||
});
|
||||
|
||||
it("reports rejected robot review when latest review does not accept", () => {
|
||||
expect(
|
||||
getGateStatus(
|
||||
makeTask({
|
||||
metadata: {
|
||||
lgtm_evidence: "ok",
|
||||
robot_reviews: [
|
||||
{
|
||||
iteration: 1,
|
||||
reviewer: "opencode",
|
||||
scope: "task evidence",
|
||||
observations: ["Observed missing output"],
|
||||
concerns: ["The current evidence is summary-only."],
|
||||
suggestions: ["Paste the literal output."],
|
||||
blind_spots: "none",
|
||||
accepted: false,
|
||||
evidence_complete: false,
|
||||
evidence_convincing: false,
|
||||
missing_evidence: ["literal output"],
|
||||
submitted_at: "2026-04-17T00:00:00.000Z",
|
||||
mode: "manual",
|
||||
},
|
||||
],
|
||||
},
|
||||
}),
|
||||
),
|
||||
).toBe(
|
||||
"latest proof review rejected the evidence; strengthen the proof and try again",
|
||||
);
|
||||
});
|
||||
|
||||
it("keeps rejection higher priority than a later reviewer warning", () => {
|
||||
expect(
|
||||
getGateStatus(
|
||||
makeTask({
|
||||
metadata: {
|
||||
lgtm_evidence: "ok",
|
||||
robot_review_last_error: "timeout",
|
||||
robot_reviews: [
|
||||
{
|
||||
iteration: 1,
|
||||
reviewer: "opencode",
|
||||
scope: "task evidence",
|
||||
observations: ["Observed missing output"],
|
||||
concerns: ["The current evidence is summary-only."],
|
||||
suggestions: ["Paste the literal output."],
|
||||
blind_spots: "none",
|
||||
accepted: false,
|
||||
evidence_complete: false,
|
||||
evidence_convincing: false,
|
||||
missing_evidence: ["literal output"],
|
||||
submitted_at: "2026-04-17T00:00:00.000Z",
|
||||
mode: "manual",
|
||||
},
|
||||
],
|
||||
},
|
||||
}),
|
||||
),
|
||||
).toBe(
|
||||
"latest proof review rejected the evidence; strengthen the proof and try again",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("getDisplayStatus", () => {
|
||||
it("returns pending for fresh tasks", () => {
|
||||
expect(getDisplayStatus(makeTask())).toBe("pending");
|
||||
});
|
||||
|
||||
it("returns in_progress for active tasks not yet escalated", () => {
|
||||
expect(getDisplayStatus(makeTask({ status: "in_progress" }))).toBe(
|
||||
"in_progress",
|
||||
);
|
||||
});
|
||||
|
||||
it("returns completed for completed tasks", () => {
|
||||
expect(getDisplayStatus(makeTask({ status: "completed" }))).toBe(
|
||||
"completed",
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -1,115 +0,0 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
DEFAULT_ROBOT_REVIEW_TIMEOUT_MS,
|
||||
extractFinalAssistantTextFromPiJsonl,
|
||||
extractRobotReviewJson,
|
||||
getCurrentModelRef,
|
||||
getPiInvocation,
|
||||
getRobotReviewTimeoutMs,
|
||||
runRobotReviewCommand,
|
||||
} from "../src/index.js";
|
||||
|
||||
describe("robot review runner helpers", () => {
|
||||
it("uses plain pi by default and allows override", () => {
|
||||
expect(
|
||||
getPiInvocation(["--mode", "json"], {} as NodeJS.ProcessEnv),
|
||||
).toEqual({
|
||||
command: "pi",
|
||||
args: ["--mode", "json"],
|
||||
});
|
||||
expect(
|
||||
getPiInvocation(["-p"], {
|
||||
PI_PROOF_TASKS_PI_BIN: "/custom/pi",
|
||||
} as NodeJS.ProcessEnv),
|
||||
).toEqual({
|
||||
command: "/custom/pi",
|
||||
args: ["-p"],
|
||||
});
|
||||
});
|
||||
|
||||
it("parses the final assistant text from pi jsonl", () => {
|
||||
const output = [
|
||||
'{"type":"message_update"}',
|
||||
'{"type":"message_end","message":{"role":"assistant","content":[{"type":"text","text":"ROBOT_REVIEW_JSON_START {\\"accepted\\":true} ROBOT_REVIEW_JSON_END"}]}}',
|
||||
].join("\n");
|
||||
expect(extractFinalAssistantTextFromPiJsonl(output)).toContain(
|
||||
"ROBOT_REVIEW_JSON_START",
|
||||
);
|
||||
});
|
||||
|
||||
it("parses noisy JSON wrapped in review markers", () => {
|
||||
const output = [
|
||||
"ROBOT_REVIEW_JSON_START",
|
||||
"and here is the JSON you asked for:",
|
||||
"```json",
|
||||
'{"accepted":true,"observations":["ok"]}',
|
||||
"```",
|
||||
"ROBOT_REVIEW_JSON_END",
|
||||
].join("\n");
|
||||
expect(extractRobotReviewJson(output)).toEqual({
|
||||
accepted: true,
|
||||
observations: ["ok"],
|
||||
});
|
||||
});
|
||||
|
||||
it("includes raw output context on parse failure", () => {
|
||||
expect(() =>
|
||||
extractRobotReviewJson(
|
||||
"ROBOT_REVIEW_JSON_START and nope ROBOT_REVIEW_JSON_END",
|
||||
),
|
||||
).toThrow(/Raw output:/);
|
||||
});
|
||||
|
||||
it("uses configured timeout or falls back to default", () => {
|
||||
expect(
|
||||
getRobotReviewTimeoutMs({
|
||||
PI_PROOF_TASKS_ROBOT_REVIEW_TIMEOUT_MS: "2500",
|
||||
} as NodeJS.ProcessEnv),
|
||||
).toBe(2500);
|
||||
expect(
|
||||
getRobotReviewTimeoutMs({
|
||||
PI_PROOF_TASKS_ROBOT_REVIEW_TIMEOUT_MS: "bad",
|
||||
} as NodeJS.ProcessEnv),
|
||||
).toBe(DEFAULT_ROBOT_REVIEW_TIMEOUT_MS);
|
||||
});
|
||||
|
||||
it("formats the current model as the reviewer model ref", () => {
|
||||
expect(getCurrentModelRef({ provider: "openai", id: "gpt-5" })).toBe(
|
||||
"openai/gpt-5",
|
||||
);
|
||||
expect(
|
||||
getCurrentModelRef({ providerId: "anthropic", modelId: "claude-haiku" }),
|
||||
).toBe("anthropic/claude-haiku");
|
||||
expect(getCurrentModelRef({ provider: "openai" })).toBeUndefined();
|
||||
});
|
||||
|
||||
it("times out bounded child commands", async () => {
|
||||
await expect(
|
||||
runRobotReviewCommand(
|
||||
{
|
||||
command: process.execPath,
|
||||
args: ["-e", "setTimeout(() => {}, 1000)"],
|
||||
},
|
||||
undefined,
|
||||
25,
|
||||
),
|
||||
).rejects.toThrow(/timed out/i);
|
||||
});
|
||||
|
||||
it("extracts assistant text from a child jsonl process", async () => {
|
||||
const script = [
|
||||
"process.stdout.write(JSON.stringify({type:'message_update'}) + '\\n');",
|
||||
"process.stdout.write(JSON.stringify({type:'message_end',message:{role:'assistant',content:[{type:'text',text:'ROBOT_REVIEW_JSON_START {\\\"accepted\\\":true,\\\"observations\\\":[\\\"ok\\\"]} ROBOT_REVIEW_JSON_END'}]}}) + '\\n');",
|
||||
].join("");
|
||||
const result = await runRobotReviewCommand(
|
||||
{
|
||||
command: process.execPath,
|
||||
args: ["-e", script],
|
||||
},
|
||||
undefined,
|
||||
500,
|
||||
);
|
||||
expect(result.exitCode).toBe(0);
|
||||
expect(result.stdout).toContain("ROBOT_REVIEW_JSON_END");
|
||||
});
|
||||
});
|
||||
@@ -1,442 +0,0 @@
|
||||
import { mkdtempSync, writeFileSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
archiveCurrentEvidence,
|
||||
buildArtifactRecords,
|
||||
buildRobotReviewPrompt,
|
||||
getCurrentEvidenceIteration,
|
||||
getEvidenceHistory,
|
||||
renderEvidencePacket,
|
||||
renderProofLog,
|
||||
} from "../src/index.js";
|
||||
import {
|
||||
appendRobotReviewMetadata,
|
||||
getLatestRobotReview,
|
||||
getRobotReviews,
|
||||
hasCompleteProofClaim,
|
||||
relaxAdvisoryVerificationHints,
|
||||
shouldCompleteAfterAcceptedReview,
|
||||
} from "../src/robot-review.js";
|
||||
import type { Task } from "../src/types.js";
|
||||
|
||||
function makeTask(overrides: Partial<Task> = {}): Task {
|
||||
return {
|
||||
id: "1",
|
||||
subject: "Test",
|
||||
description: "Desc",
|
||||
done_criterion: "done",
|
||||
status: "pending",
|
||||
progress_label: undefined,
|
||||
metadata: {},
|
||||
blocks: [],
|
||||
blockedBy: [],
|
||||
createdAt: 0,
|
||||
updatedAt: 0,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe("robot review helpers", () => {
|
||||
it("completes only after accepted review and complete proof claim", () => {
|
||||
const task = makeTask({
|
||||
metadata: {
|
||||
lgtm_evidence: "literal output",
|
||||
lgtm_failure_likely: "wrong command",
|
||||
lgtm_failure_sneaky: "right output for wrong reason",
|
||||
lgtm_failure_unknown: "untested platform",
|
||||
lgtm_falsification_test: "npm test\npass",
|
||||
lgtm_evidence_reasoning:
|
||||
"the test output rules out the named failures for this scope",
|
||||
lgtm_verification_hints: [
|
||||
"test/robot-review.test.ts shows the expectation",
|
||||
],
|
||||
lgtm_remaining_uncertainty: "does not test prod install",
|
||||
},
|
||||
});
|
||||
expect(hasCompleteProofClaim(task)).toBe(true);
|
||||
expect(shouldCompleteAfterAcceptedReview(task, true)).toBe(true);
|
||||
expect(shouldCompleteAfterAcceptedReview(task, false)).toBe(false);
|
||||
expect(
|
||||
shouldCompleteAfterAcceptedReview(
|
||||
makeTask({ metadata: { lgtm_evidence: "literal output" } }),
|
||||
true,
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("reads legacy single-review metadata", () => {
|
||||
const task = makeTask({
|
||||
metadata: {
|
||||
robot_review_reviewer: "opencode",
|
||||
robot_review_scope: "task evidence",
|
||||
robot_review_observations: [
|
||||
"Observed no command output for the core claim",
|
||||
],
|
||||
robot_review_blind_spots: "Did not rerun tests",
|
||||
robot_review_submitted_at: "2026-04-17T00:00:00.000Z",
|
||||
},
|
||||
});
|
||||
|
||||
const reviews = getRobotReviews(task);
|
||||
expect(reviews).toHaveLength(1);
|
||||
expect(reviews[0].reviewer).toBe("opencode");
|
||||
expect(reviews[0].iteration).toBe(1);
|
||||
expect(reviews[0].accepted).toBe(true);
|
||||
});
|
||||
|
||||
it("builds artifact records with absolute path and sha256", () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), "pi-proof-tasks-"));
|
||||
const path = join(dir, "evidence.log");
|
||||
writeFileSync(path, "hello\n");
|
||||
|
||||
const [artifact] = buildArtifactRecords([path]);
|
||||
expect(artifact.path).toBe(path);
|
||||
expect(artifact.bytes).toBe(6);
|
||||
expect(artifact.sha256).toHaveLength(64);
|
||||
});
|
||||
|
||||
it("archives current evidence with reason", () => {
|
||||
const task = makeTask({
|
||||
metadata: {
|
||||
lgtm_evidence: "literal output",
|
||||
lgtm_failure_likely: "wrong seed",
|
||||
lgtm_failure_sneaky: "wrong threshold",
|
||||
lgtm_failure_unknown: "untested environment",
|
||||
lgtm_falsification_test: "pytest -k check",
|
||||
lgtm_evidence_reasoning:
|
||||
"pytest output distinguishes the expected passing path from the named failures",
|
||||
lgtm_verification_hints: ["see line 5"],
|
||||
lgtm_remaining_uncertainty: "not load tested",
|
||||
lgtm_submitted_at: "2026-06-07T00:00:00.000Z",
|
||||
lgtm_commands: [{ cmd: "pytest", exit_code: 0 }],
|
||||
},
|
||||
});
|
||||
|
||||
const archived = archiveCurrentEvidence(task, "threshold changed");
|
||||
const taskWithHistory = makeTask({ metadata: archived });
|
||||
expect(getCurrentEvidenceIteration(task)?.iteration).toBe(1);
|
||||
expect(getEvidenceHistory(taskWithHistory)).toHaveLength(1);
|
||||
expect(getEvidenceHistory(taskWithHistory)[0].supersede_reason).toBe(
|
||||
"threshold changed",
|
||||
);
|
||||
});
|
||||
|
||||
it("treats advisory rubric failures as non-blocking when core evidence already passes", () => {
|
||||
const review = relaxAdvisoryVerificationHints({
|
||||
reviewer: "auto",
|
||||
scope: "task evidence",
|
||||
observations: ["Observed commit, push, and test logs"],
|
||||
concerns: [],
|
||||
suggestions: [],
|
||||
blind_spots: "Did not inspect interactive UI",
|
||||
accepted: false,
|
||||
evidence_complete: true,
|
||||
evidence_convincing: false,
|
||||
missing_evidence: [
|
||||
"verification_hints_actionable",
|
||||
"evidence_distinguishes_success",
|
||||
],
|
||||
submitted_at: "2026-06-13T00:00:00.000Z",
|
||||
mode: "auto",
|
||||
rubric: {
|
||||
evidence_covers_done_criterion: {
|
||||
reason: "verbatim logs match",
|
||||
pass: true,
|
||||
},
|
||||
falsification_test_runnable: {
|
||||
reason: "command and output shown",
|
||||
pass: true,
|
||||
},
|
||||
failure_modes_addressed: {
|
||||
reason: "plausible top risks named",
|
||||
pass: true,
|
||||
},
|
||||
evidence_distinguishes_success: {
|
||||
reason: "reasoning writeup is thin",
|
||||
pass: false,
|
||||
},
|
||||
verification_hints_actionable: {
|
||||
reason: "paths are vague",
|
||||
pass: false,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(review.accepted).toBe(true);
|
||||
expect(review.evidence_convincing).toBe(true);
|
||||
expect(
|
||||
review.observations.some((item) => item.includes("treated as advisory")),
|
||||
).toBe(true);
|
||||
expect(review.missing_evidence).toEqual([]);
|
||||
});
|
||||
|
||||
it("does not relax verification hints unless the core rubric passes", () => {
|
||||
const review = relaxAdvisoryVerificationHints({
|
||||
reviewer: "auto",
|
||||
scope: "task evidence",
|
||||
observations: ["Observed vague summary only"],
|
||||
concerns: [],
|
||||
suggestions: [],
|
||||
blind_spots: "Did not rerun tests",
|
||||
accepted: false,
|
||||
evidence_complete: true,
|
||||
evidence_convincing: false,
|
||||
missing_evidence: ["verification_hints_actionable"],
|
||||
submitted_at: "2026-06-13T00:00:00.000Z",
|
||||
mode: "auto",
|
||||
rubric: {
|
||||
evidence_covers_done_criterion: { reason: "summary only", pass: false },
|
||||
falsification_test_runnable: {
|
||||
reason: "command and output shown",
|
||||
pass: true,
|
||||
},
|
||||
failure_modes_addressed: {
|
||||
reason: "plausible top risks named",
|
||||
pass: true,
|
||||
},
|
||||
evidence_distinguishes_success: {
|
||||
reason: "evidence does not rule out summary-only failure",
|
||||
pass: false,
|
||||
},
|
||||
verification_hints_actionable: {
|
||||
reason: "paths are vague",
|
||||
pass: false,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(review.accepted).toBe(false);
|
||||
expect(review.evidence_convincing).toBe(false);
|
||||
});
|
||||
|
||||
it("renders one compact evidence packet for both human and robot review", () => {
|
||||
const task = makeTask({
|
||||
metadata: {
|
||||
lgtm_evidence: "literal output",
|
||||
lgtm_failure_likely: "wrong seed",
|
||||
lgtm_failure_sneaky: "wrong threshold",
|
||||
lgtm_failure_unknown: "does not test UI rendering",
|
||||
lgtm_falsification_test: "pytest -k check\nPASSED",
|
||||
lgtm_evidence_reasoning:
|
||||
"The passing pytest transcript distinguishes success from wrong-threshold and wrong-seed failures for this test scope.",
|
||||
lgtm_verification_hints: [
|
||||
"test/robot-review.test.ts contains the new guard test",
|
||||
],
|
||||
lgtm_remaining_uncertainty: "not load tested",
|
||||
lgtm_submitted_at: "2026-06-14T00:00:00.000Z",
|
||||
lgtm_commands: [
|
||||
{ cmd: "npm test", exit_code: 0, stdout_path: "/tmp/test.log" },
|
||||
],
|
||||
lgtm_evidence_artifacts: [
|
||||
{ path: "/tmp/test.log", sha256: "abc", bytes: 123 },
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
const packet = renderEvidencePacket(task);
|
||||
const prompt = buildRobotReviewPrompt(task);
|
||||
expect(packet).toContain("## Goal");
|
||||
expect(packet).toContain("## Attempt 1");
|
||||
expect(packet).toContain("### Evidence");
|
||||
expect(packet).toContain("### Verify");
|
||||
expect(prompt).toContain(packet);
|
||||
expect(prompt).toContain(
|
||||
"does this packet prove the exact user-visible success condition",
|
||||
);
|
||||
expect(prompt).toContain(
|
||||
"Do not reject solely because items 3, 4, or 5 are weak",
|
||||
);
|
||||
expect(prompt).toContain(
|
||||
"concrete missing artifacts or outputs that block acceptance",
|
||||
);
|
||||
});
|
||||
|
||||
it("truncates long submitted evidence in the rendered proof log and points to the full artifact", () => {
|
||||
const longEvidence = Array.from(
|
||||
{ length: 35 },
|
||||
(_, i) => `line ${i + 1}`,
|
||||
).join("\n");
|
||||
const task = makeTask({
|
||||
metadata: {
|
||||
lgtm_evidence: longEvidence,
|
||||
lgtm_failure_likely: "wrong seed",
|
||||
lgtm_failure_sneaky: "wrong threshold",
|
||||
lgtm_failure_unknown: "untested environment",
|
||||
lgtm_falsification_test: "pytest -k check\nPASSED",
|
||||
lgtm_evidence_reasoning:
|
||||
"The transcript rules out the named failures for this scope.",
|
||||
lgtm_verification_hints: ["see /tmp/test.log"],
|
||||
lgtm_remaining_uncertainty: "not load tested",
|
||||
lgtm_submitted_at: "2026-06-14T00:00:00.000Z",
|
||||
lgtm_evidence_artifacts: [
|
||||
{ path: "/tmp/test.log", sha256: "abc", bytes: 123 },
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
const log = renderProofLog(task);
|
||||
expect(log).toContain("line 1");
|
||||
expect(log).toContain("line 8");
|
||||
expect(log).toContain("line 35");
|
||||
expect(log).not.toContain("line 9");
|
||||
expect(log).toContain("[... 19 middle lines omitted ...]");
|
||||
expect(log).toContain(
|
||||
"[truncated at 16 lines from 35; showing first 8 and last 8; full text: /tmp/test.log]",
|
||||
);
|
||||
});
|
||||
|
||||
it("appends robot reviews as iterations", () => {
|
||||
const task = makeTask();
|
||||
const metadata1 = appendRobotReviewMetadata(task, {
|
||||
reviewer: "opencode",
|
||||
scope: "task evidence",
|
||||
observations: ["Observed missing benchmark output"],
|
||||
concerns: ["The current evidence does not show the claimed speedup."],
|
||||
suggestions: ["Add the benchmark transcript for the claimed speedup."],
|
||||
blind_spots: "Did not inspect prod config",
|
||||
accepted: false,
|
||||
evidence_complete: false,
|
||||
evidence_convincing: false,
|
||||
missing_evidence: ["Benchmark output for the claimed speedup"],
|
||||
submitted_at: "2026-04-17T00:00:00.000Z",
|
||||
mode: "auto",
|
||||
});
|
||||
const task1 = makeTask({ metadata: metadata1 });
|
||||
const metadata2 = appendRobotReviewMetadata(task1, {
|
||||
reviewer: "opencode",
|
||||
scope: "updated task evidence",
|
||||
observations: ["Observed benchmark output and test transcript"],
|
||||
concerns: [],
|
||||
suggestions: [],
|
||||
blind_spots: "Did not inspect long-run stability",
|
||||
accepted: true,
|
||||
evidence_complete: true,
|
||||
evidence_convincing: true,
|
||||
missing_evidence: [],
|
||||
submitted_at: "2026-04-17T01:00:00.000Z",
|
||||
mode: "auto",
|
||||
});
|
||||
|
||||
const task2 = makeTask({ metadata: metadata2 });
|
||||
const reviews = getRobotReviews(task2);
|
||||
expect(reviews).toHaveLength(2);
|
||||
expect(reviews[0].iteration).toBe(1);
|
||||
expect(reviews[1].iteration).toBe(2);
|
||||
expect(getLatestRobotReview(task2)?.evidence_convincing).toBe(true);
|
||||
expect(task2.metadata.robot_review_iteration_count).toBe(2);
|
||||
});
|
||||
|
||||
it("renders a simple proof log with judgement and suggestions", () => {
|
||||
const taskWithEvidence = makeTask({
|
||||
metadata: {
|
||||
lgtm_evidence: "npm test\n125 passed",
|
||||
lgtm_failure_likely: "old package name still in README",
|
||||
lgtm_failure_sneaky: "top-level direct completion still slips through",
|
||||
lgtm_failure_unknown: "fresh judge command fails in a real session",
|
||||
lgtm_falsification_test: "npm test\n125 passed",
|
||||
lgtm_evidence_reasoning:
|
||||
"The test transcript and grep distinguish the intended behavior from stale workflow regressions.",
|
||||
lgtm_verification_hints: [
|
||||
"README.md install block shows pi-proof-tasks",
|
||||
],
|
||||
lgtm_remaining_uncertainty: "Did not exercise every model provider.",
|
||||
lgtm_submitted_at: "2026-06-14T00:00:00.000Z",
|
||||
},
|
||||
});
|
||||
const task = makeTask({
|
||||
metadata: {
|
||||
...taskWithEvidence.metadata,
|
||||
...appendRobotReviewMetadata(taskWithEvidence, {
|
||||
reviewer: "auto",
|
||||
scope: "proof log",
|
||||
observations: ["Observed the test transcript and renamed package."],
|
||||
concerns: ["The live Pi session path is still untested."],
|
||||
suggestions: ["Run one self-hosted TaskClaimDone UAT."],
|
||||
blind_spots: "Did not inspect external auth state",
|
||||
accepted: false,
|
||||
evidence_complete: true,
|
||||
evidence_convincing: false,
|
||||
missing_evidence: ["self-hosted TaskClaimDone UAT"],
|
||||
submitted_at: "2026-06-14T00:01:00.000Z",
|
||||
mode: "auto",
|
||||
}),
|
||||
},
|
||||
});
|
||||
|
||||
const log = renderProofLog(task);
|
||||
expect(log).toContain("# Task #1: Test");
|
||||
expect(log).toContain("## Goal");
|
||||
expect(log).toContain("## Attempt 1");
|
||||
expect(log).toContain("### Evidence");
|
||||
expect(log).toContain("### Verify");
|
||||
expect(log).toContain("### Judgement");
|
||||
expect(log).toContain("Refused by auto");
|
||||
expect(log).toContain("Needs:");
|
||||
expect(log).toContain("Next:");
|
||||
expect(log).toContain("Run one self-hosted TaskClaimDone UAT.");
|
||||
});
|
||||
|
||||
it("keeps full submitted evidence in the automatic review packet even when proof logs truncate it", () => {
|
||||
const artifactPath = join(tmpdir(), "proof-packet-long-evidence.log");
|
||||
const longEvidence = Array.from(
|
||||
{ length: 35 },
|
||||
(_, i) => `line ${i + 1}`,
|
||||
).join("\n");
|
||||
writeFileSync(artifactPath, longEvidence);
|
||||
const task = makeTask({
|
||||
metadata: {
|
||||
lgtm_evidence: longEvidence,
|
||||
lgtm_failure_likely: "missing artifact",
|
||||
lgtm_failure_sneaky: "wrong slice shown",
|
||||
lgtm_failure_unknown: "untested provider path",
|
||||
lgtm_falsification_test: "npm test\npass",
|
||||
lgtm_evidence_reasoning:
|
||||
"The full evidence must stay visible to the judge even if humans see a shortened preview.",
|
||||
lgtm_verification_hints: [
|
||||
"Open the artifact if the inline preview truncates.",
|
||||
],
|
||||
lgtm_remaining_uncertainty: "Did not inspect live TUI.",
|
||||
lgtm_evidence_artifacts: buildArtifactRecords([artifactPath]),
|
||||
},
|
||||
});
|
||||
|
||||
const proofLog = renderProofLog(task);
|
||||
const reviewPacket = renderEvidencePacket(task, {
|
||||
truncateEvidence: false,
|
||||
});
|
||||
expect(proofLog).toContain("line 8");
|
||||
expect(proofLog).toContain("line 35");
|
||||
expect(proofLog).not.toContain("line 9");
|
||||
expect(reviewPacket).toContain("line 35");
|
||||
expect(reviewPacket).not.toContain("[truncated at 16 lines");
|
||||
});
|
||||
|
||||
it("renders reviewer-unavailable proof logs for fail-open completion notes", () => {
|
||||
const task = makeTask({
|
||||
status: "completed",
|
||||
metadata: {
|
||||
lgtm_evidence: "npm test\n125 passed",
|
||||
lgtm_failure_likely: "old package name still in README",
|
||||
lgtm_failure_sneaky: "top-level direct completion still slips through",
|
||||
lgtm_failure_unknown: "fresh judge command fails in a real session",
|
||||
lgtm_falsification_test: "npm test\n125 passed",
|
||||
lgtm_evidence_reasoning:
|
||||
"The test transcript and grep distinguish the intended behavior from stale workflow regressions.",
|
||||
lgtm_verification_hints: [
|
||||
"README.md install block shows pi-proof-tasks",
|
||||
],
|
||||
lgtm_remaining_uncertainty: "Did not exercise every model provider.",
|
||||
robot_review_last_error: "judge auth failed",
|
||||
},
|
||||
});
|
||||
|
||||
const log = renderProofLog(task);
|
||||
expect(log).toContain("completed with reviewer unavailable");
|
||||
expect(log).toContain("### Judgement");
|
||||
expect(log).toContain("judge auth failed");
|
||||
expect(log).toContain("Autonomy continued without blocking completion.");
|
||||
expect(log).not.toContain("Needs:");
|
||||
});
|
||||
});
|
||||
@@ -1,187 +0,0 @@
|
||||
import { chmodSync, mkdtempSync, writeFileSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import proofTasksExtension from "../src/index.js";
|
||||
|
||||
type RegisteredTool = {
|
||||
name: string;
|
||||
execute: (...args: any[]) => Promise<any>;
|
||||
};
|
||||
|
||||
function makeHarness() {
|
||||
const tools = new Map<string, RegisteredTool>();
|
||||
const pi = {
|
||||
on: vi.fn(),
|
||||
registerTool: vi.fn((tool: RegisteredTool) => tools.set(tool.name, tool)),
|
||||
registerCommand: vi.fn(),
|
||||
sendMessage: vi.fn(),
|
||||
};
|
||||
|
||||
proofTasksExtension(pi as any);
|
||||
|
||||
async function execTool(
|
||||
name: string,
|
||||
params: Record<string, unknown>,
|
||||
ctx: Record<string, unknown> = {},
|
||||
) {
|
||||
const tool = tools.get(name);
|
||||
if (!tool) throw new Error(`Tool ${name} not registered`);
|
||||
return tool.execute("tool-call", params, undefined, undefined, ctx);
|
||||
}
|
||||
|
||||
return { execTool };
|
||||
}
|
||||
|
||||
function writeReviewerScript(source: string): string {
|
||||
const dir = mkdtempSync(join(tmpdir(), "pi-proof-reviewer-"));
|
||||
const path = join(dir, "reviewer.js");
|
||||
writeFileSync(path, `#!/usr/bin/env node\n${source}\n`);
|
||||
chmodSync(path, 0o755);
|
||||
return path;
|
||||
}
|
||||
|
||||
const ORIGINAL_PI_BIN = process.env.PI_PROOF_TASKS_PI_BIN;
|
||||
afterEach(() => {
|
||||
if (ORIGINAL_PI_BIN === undefined) delete process.env.PI_PROOF_TASKS_PI_BIN;
|
||||
else process.env.PI_PROOF_TASKS_PI_BIN = ORIGINAL_PI_BIN;
|
||||
});
|
||||
|
||||
describe("TaskClaimDone end-to-end proof flow", () => {
|
||||
it("keeps the task open on rejected review and /lgtm-style TaskGet shows truncated evidence", async () => {
|
||||
const reviewer = writeReviewerScript(`
|
||||
const review = {
|
||||
reviewer: "fake-judge",
|
||||
scope: "task evidence",
|
||||
rubric: {
|
||||
evidence_covers_done_criterion: { reason: "missing one artifact", pass: false },
|
||||
falsification_test_runnable: { reason: "ok", pass: true },
|
||||
failure_modes_addressed: { reason: "ok", pass: true },
|
||||
evidence_distinguishes_success: { reason: "not enough", pass: false },
|
||||
verification_hints_actionable: { reason: "ok", pass: true }
|
||||
},
|
||||
observations: ["Observed truncated proof packet"],
|
||||
concerns: ["Need stronger evidence"],
|
||||
suggestions: ["Add one more artifact"],
|
||||
blind_spots: "Did not inspect live TUI",
|
||||
missing_evidence: ["evidence_covers_done_criterion", "evidence_distinguishes_success"],
|
||||
evidence_complete: false,
|
||||
evidence_convincing: false,
|
||||
accepted: false
|
||||
};
|
||||
console.log("ROBOT_REVIEW_JSON_START");
|
||||
console.log(JSON.stringify(review));
|
||||
console.log("ROBOT_REVIEW_JSON_END");
|
||||
`);
|
||||
process.env.PI_PROOF_TASKS_PI_BIN = reviewer;
|
||||
|
||||
const harness = makeHarness();
|
||||
await harness.execTool("TaskCreate", {
|
||||
subject: "Proof task",
|
||||
description: "Desc",
|
||||
done_criterion: "done",
|
||||
});
|
||||
|
||||
const artifactPath = join(tmpdir(), "proof-long-evidence.log");
|
||||
const longEvidence = Array.from(
|
||||
{ length: 35 },
|
||||
(_, i) => `line ${i + 1}`,
|
||||
).join("\n");
|
||||
writeFileSync(artifactPath, longEvidence);
|
||||
|
||||
const claim = await harness.execTool(
|
||||
"TaskClaimDone",
|
||||
{
|
||||
taskId: "1",
|
||||
evidence: longEvidence,
|
||||
failure_likely: "missing artifact",
|
||||
failure_sneaky: "right shape for wrong reason",
|
||||
failure_unknown: "untested provider path",
|
||||
falsification_test: "npm test\npass",
|
||||
evidence_reasoning:
|
||||
"The packet distinguishes the named failures for this test scope.",
|
||||
verification_hints: ["look at the proof log"],
|
||||
remaining_uncertainty: "Did not inspect live TUI",
|
||||
evidence_paths: [artifactPath],
|
||||
},
|
||||
{ model: { provider: "openai", id: "gpt-5" } },
|
||||
);
|
||||
|
||||
const claimText = claim.content[0].text;
|
||||
|
||||
const taskGet = await harness.execTool("TaskGet", { taskId: "1" });
|
||||
const text = taskGet.content[0].text;
|
||||
|
||||
expect(claimText).toContain("## TaskClaimDone -> Task #1: Proof task");
|
||||
expect(claimText).toContain("### Metadata");
|
||||
expect(claimText).toContain("- Proof iterations: 1");
|
||||
expect(claimText).toContain("- Robot reviews: 1");
|
||||
expect(text).toContain("Status: pending");
|
||||
expect(text).toContain(
|
||||
"Gate status: latest proof review rejected the evidence; strengthen the proof and try again",
|
||||
);
|
||||
expect(text).toContain("line 1");
|
||||
expect(text).toContain("line 8");
|
||||
expect(text).toContain("line 35");
|
||||
expect(text).not.toContain("line 9");
|
||||
expect(text).toContain("[... 19 middle lines omitted ...]");
|
||||
expect(text).toContain(
|
||||
`[truncated at 16 lines from 35; showing first 8 and last 8; full text: ${artifactPath}]`,
|
||||
);
|
||||
expect(text).toContain("### Judgement");
|
||||
expect(text).toContain("Refused");
|
||||
expect(text).toContain("Needs:");
|
||||
expect(text).toContain("Add one more artifact");
|
||||
});
|
||||
|
||||
it("completes the task fail-open on parse failure and preserves the failure note", async () => {
|
||||
const reviewer = writeReviewerScript(`
|
||||
console.log("ROBOT_REVIEW_JSON_START and nope ROBOT_REVIEW_JSON_END");
|
||||
`);
|
||||
process.env.PI_PROOF_TASKS_PI_BIN = reviewer;
|
||||
|
||||
const harness = makeHarness();
|
||||
await harness.execTool("TaskCreate", {
|
||||
subject: "Proof task",
|
||||
description: "Desc",
|
||||
done_criterion: "done",
|
||||
});
|
||||
|
||||
const claim = await harness.execTool(
|
||||
"TaskClaimDone",
|
||||
{
|
||||
taskId: "1",
|
||||
evidence: "short evidence",
|
||||
failure_likely: "missing artifact",
|
||||
failure_sneaky: "right shape for wrong reason",
|
||||
failure_unknown: "untested provider path",
|
||||
falsification_test: "npm test\npass",
|
||||
evidence_reasoning:
|
||||
"The packet distinguishes the named failures for this test scope.",
|
||||
verification_hints: ["look at the proof log"],
|
||||
remaining_uncertainty: "Did not inspect live TUI",
|
||||
},
|
||||
{ model: { provider: "openai", id: "gpt-5" } },
|
||||
);
|
||||
|
||||
const claimText = claim.content[0].text;
|
||||
|
||||
const taskGet = await harness.execTool("TaskGet", { taskId: "1" });
|
||||
const text = taskGet.content[0].text;
|
||||
|
||||
expect(claimText).toContain("## TaskClaimDone -> Task #1: Proof task");
|
||||
expect(claimText).toContain("### Metadata");
|
||||
expect(claimText).toContain(
|
||||
"- Gate status: completed with reviewer unavailable",
|
||||
);
|
||||
expect(text).toContain("Status: completed");
|
||||
expect(text).toContain("completed with reviewer unavailable");
|
||||
expect(text).toContain("Raw output:");
|
||||
expect(text).toContain("Autonomy continued without blocking completion.");
|
||||
expect(text).not.toContain("Needs:");
|
||||
expect(text).toContain(
|
||||
"ROBOT_REVIEW_JSON_START and nope ROBOT_REVIEW_JSON_END",
|
||||
);
|
||||
expect(text).toContain("Autonomy continued without blocking completion.");
|
||||
});
|
||||
});
|
||||
+81
-253
@@ -3,7 +3,6 @@ import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import proofTasksExtension from "../src/index.js";
|
||||
import { TaskStore } from "../src/task-store.js";
|
||||
|
||||
type RegisteredTool = {
|
||||
name: string;
|
||||
@@ -12,19 +11,14 @@ type RegisteredTool = {
|
||||
|
||||
function makeHarness() {
|
||||
const tools = new Map<string, RegisteredTool>();
|
||||
const handlers = new Map<string, Array<(...args: any[]) => any>>();
|
||||
const pi = {
|
||||
on: vi.fn((event: string, handler: (...args: any[]) => any) => {
|
||||
const existing = handlers.get(event) ?? [];
|
||||
existing.push(handler);
|
||||
handlers.set(event, existing);
|
||||
}),
|
||||
on: vi.fn(),
|
||||
events: { on: vi.fn(() => vi.fn()), emit: vi.fn() },
|
||||
registerTool: vi.fn((tool: RegisteredTool) => tools.set(tool.name, tool)),
|
||||
registerCommand: vi.fn(),
|
||||
sendMessage: vi.fn(),
|
||||
};
|
||||
|
||||
proofTasksExtension(pi as any);
|
||||
proofTasksExtension(pi as any, { ui: undefined } as any);
|
||||
|
||||
async function execTool(name: string, params: Record<string, unknown>) {
|
||||
const tool = tools.get(name);
|
||||
@@ -32,281 +26,115 @@ function makeHarness() {
|
||||
return tool.execute("tool-call", params, undefined, undefined, {});
|
||||
}
|
||||
|
||||
async function trigger(event: string, payload: any = {}, ctx: any = {}) {
|
||||
for (const handler of handlers.get(event) ?? []) {
|
||||
await handler(payload, ctx);
|
||||
}
|
||||
}
|
||||
|
||||
return { execTool, trigger };
|
||||
return { execTool };
|
||||
}
|
||||
|
||||
const tempDirs: string[] = [];
|
||||
|
||||
afterEach(() => {
|
||||
delete process.env.PI_TASKS;
|
||||
while (tempDirs.length > 0)
|
||||
rmSync(tempDirs.pop()!, { recursive: true, force: true });
|
||||
while (tempDirs.length > 0) rmSync(tempDirs.pop()!, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
describe("Task tools", () => {
|
||||
it("renders a compact one-line-per-task summary", async () => {
|
||||
const harness = makeHarness();
|
||||
await harness.execTool("TaskCreate", {
|
||||
subject: "Design the flux capacitor",
|
||||
description: "Desc",
|
||||
done_criterion: "done",
|
||||
});
|
||||
await harness.execTool("TaskCreate", {
|
||||
subject: "Acquiring plutonium",
|
||||
description: "Desc",
|
||||
done_criterion: "done",
|
||||
progress_label: "Acquiring plutonium",
|
||||
});
|
||||
await harness.execTool("TaskCreate", {
|
||||
subject: "Install flux capacitor in DeLorean",
|
||||
description: "Desc",
|
||||
done_criterion: "done",
|
||||
parentId: "1",
|
||||
});
|
||||
await harness.execTool("TaskCreate", {
|
||||
subject: "Test time travel at 88 mph",
|
||||
description: "Desc",
|
||||
done_criterion: "done",
|
||||
});
|
||||
await harness.execTool("TaskCreate", { subject: "Design flux capacitor", done_criterion: "blueprint approved" });
|
||||
await harness.execTool("TaskCreate", { subject: "Get plutonium", done_criterion: "1.21 GW available" });
|
||||
await harness.execTool("TaskCreate", { subject: "Install in DeLorean", parentId: "1" });
|
||||
await harness.execTool("TaskCreate", { subject: "Simple task" });
|
||||
|
||||
await harness.execTool("TaskUpdate", { taskId: "1", status: "completed" });
|
||||
await harness.execTool("TaskUpdate", {
|
||||
taskId: "2",
|
||||
status: "in_progress",
|
||||
});
|
||||
await harness.execTool("TaskUpdate", {
|
||||
taskId: "3",
|
||||
add_blocked_by: ["1"],
|
||||
});
|
||||
await harness.execTool("TaskUpdate", {
|
||||
taskId: "4",
|
||||
add_blocked_by: ["2", "3"],
|
||||
});
|
||||
const list = await harness.execTool("TaskList", {});
|
||||
const text = list.content[0].text;
|
||||
|
||||
const result = await harness.execTool("TaskList", {});
|
||||
const text = result.content[0].text;
|
||||
|
||||
expect(text).toContain("● 4 goals (1 in progress, 3 open)");
|
||||
expect(text).toContain("◻ #1 Design the flux capacitor");
|
||||
expect(text).toContain("◼ #2 Acquiring plutonium");
|
||||
expect(text).toContain(
|
||||
"◻ #3 Install flux capacitor in DeLorean › subtask of #1 › blocked by #1",
|
||||
);
|
||||
expect(text).toContain(
|
||||
"◻ #4 Test time travel at 88 mph › blocked by #2, #3",
|
||||
);
|
||||
expect(text).not.toContain("[ACTIVE]");
|
||||
expect(text).not.toContain("[PENDING]");
|
||||
expect(text).not.toContain("[DONE");
|
||||
expect(text).not.toContain("proof claim submitted");
|
||||
expect(text).not.toContain("test:");
|
||||
// Goals get ★, subtasks and plain tasks don't
|
||||
expect(text).toContain("★ #1");
|
||||
expect(text).toContain("★ #2");
|
||||
expect(text).toContain("#3"); // subtask
|
||||
expect(text).toContain("#4"); // plain task
|
||||
});
|
||||
|
||||
it("shows TaskCreate output with metadata and compact previews", async () => {
|
||||
it("shows TaskCreate output with goal info", async () => {
|
||||
const harness = makeHarness();
|
||||
const result = await harness.execTool("TaskCreate", {
|
||||
subject: "Top-level goal",
|
||||
description: "Line 1\nLine 2\nLine 3",
|
||||
done_criterion: "observe line a\nobserve line b",
|
||||
progress_label: "Running check",
|
||||
metadata: { owner: "pi", note: "short" },
|
||||
subject: "Fix auth bug",
|
||||
done_criterion: "pytest test_auth passes",
|
||||
failure_mode: "doesn't cover expired tokens",
|
||||
});
|
||||
|
||||
const text = result.content[0].text;
|
||||
expect(text).toContain("## TaskCreate -> Task #1: Top-level goal");
|
||||
expect(text).toContain("### Metadata");
|
||||
expect(text).toContain("- Metadata keys: 2");
|
||||
expect(text).toContain("### Done criterion");
|
||||
expect(text).toContain("### Description");
|
||||
expect(text).toContain("### Progress label");
|
||||
expect(text).toContain("### Metadata preview");
|
||||
|
||||
expect(text).toContain("#1 Fix auth bug");
|
||||
expect(text).toContain("Done when: pytest test_auth passes");
|
||||
expect(text).toContain("Failure mode: doesn't cover expired tokens");
|
||||
expect(text).toContain("[goal]");
|
||||
});
|
||||
|
||||
it("shows TaskUpdate output with changed fields and previews", async () => {
|
||||
it("shows TaskCreate output for plain task", async () => {
|
||||
const harness = makeHarness();
|
||||
const result = await harness.execTool("TaskCreate", {
|
||||
subject: "Write docs",
|
||||
});
|
||||
const text = result.content[0].text;
|
||||
|
||||
expect(text).toContain("#1 Write docs");
|
||||
expect(text).toContain("[task]");
|
||||
});
|
||||
|
||||
it("shows TaskUpdate output", async () => {
|
||||
const harness = makeHarness();
|
||||
await harness.execTool("TaskCreate", { subject: "Fix bug" });
|
||||
const result = await harness.execTool("TaskUpdate", { taskId: "1", status: "in_progress" });
|
||||
const text = result.content[0].text;
|
||||
|
||||
expect(text).toContain("Updated #1 status");
|
||||
});
|
||||
|
||||
it("completes subtasks via TaskUpdate", async () => {
|
||||
const harness = makeHarness();
|
||||
await harness.execTool("TaskCreate", { subject: "Parent goal", done_criterion: "all done" });
|
||||
await harness.execTool("TaskCreate", { subject: "Subtask", parentId: "1" });
|
||||
const result = await harness.execTool("TaskUpdate", { taskId: "2", status: "completed" });
|
||||
const text = result.content[0].text;
|
||||
|
||||
expect(text).toContain("Updated #2 status");
|
||||
|
||||
const detail = await harness.execTool("TaskGet", { taskId: "2" });
|
||||
expect(detail.content[0].text).toContain("completed");
|
||||
});
|
||||
|
||||
it("completes goals via TaskComplete with evidence", async () => {
|
||||
const harness = makeHarness();
|
||||
await harness.execTool("TaskCreate", {
|
||||
subject: "Top-level goal",
|
||||
description: "Desc",
|
||||
done_criterion: "done",
|
||||
subject: "Fix auth bug",
|
||||
done_criterion: "test passes",
|
||||
});
|
||||
|
||||
const result = await harness.execTool("TaskUpdate", {
|
||||
const result = await harness.execTool("TaskComplete", {
|
||||
taskId: "1",
|
||||
status: "in_progress",
|
||||
progress_label: "Running check",
|
||||
metadata: { owner: "pi" },
|
||||
evidence: "pytest test_auth → 12/12 passed",
|
||||
failure_likely: "doesn't cover expired tokens",
|
||||
});
|
||||
|
||||
const text = result.content[0].text;
|
||||
expect(text).toContain("## TaskUpdate -> Task #1: Top-level goal");
|
||||
expect(text).toContain(
|
||||
"- Updated fields: status, progress_label, metadata",
|
||||
);
|
||||
expect(text).toContain("- status: pending -> in_progress");
|
||||
expect(text).toContain("- progress_label: (missing) -> Running check");
|
||||
expect(text).toContain("### Metadata patch");
|
||||
});
|
||||
|
||||
it("shows completed subtasks without proof-lane clutter", async () => {
|
||||
const harness = makeHarness();
|
||||
await harness.execTool("TaskCreate", {
|
||||
subject: "Top-level goal",
|
||||
description: "Desc",
|
||||
done_criterion: "done",
|
||||
});
|
||||
await harness.execTool("TaskCreate", {
|
||||
subject: "Finished checklist item",
|
||||
description: "Desc",
|
||||
done_criterion: "done",
|
||||
parentId: "1",
|
||||
});
|
||||
|
||||
await harness.execTool("TaskUpdate", { taskId: "2", status: "completed" });
|
||||
|
||||
const result = await harness.execTool("TaskList", {});
|
||||
const text = result.content[0].text;
|
||||
|
||||
expect(text).toContain("● 2 goals (1 done hidden, 1 open)");
|
||||
expect(text).toContain("◻ #1 Top-level goal");
|
||||
expect(text).not.toContain("#2 Finished checklist item");
|
||||
expect(text).not.toContain("[DONE");
|
||||
expect(text).not.toContain("proof claim submitted");
|
||||
expect(text).toContain("✓ #1 Fix auth bug");
|
||||
expect(text).toContain("Evidence: pytest test_auth → 12/12 passed");
|
||||
expect(text).toContain("Likely failure: doesn't cover expired tokens");
|
||||
|
||||
const detail = await harness.execTool("TaskGet", { taskId: "1" });
|
||||
expect(detail.content[0].text).toContain("completed");
|
||||
});
|
||||
|
||||
it("keeps persisted completed tasks on startup but hides them from the collapsed list", async () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), "pi-proof-tasks-"));
|
||||
tempDirs.push(dir);
|
||||
const taskPath = join(dir, "tasks.json");
|
||||
process.env.PI_TASKS = taskPath;
|
||||
|
||||
const seeded = new TaskStore(taskPath);
|
||||
seeded.create("Finished work", "Desc", "done");
|
||||
seeded.complete("1");
|
||||
|
||||
it("shows TaskGet detail for a goal", async () => {
|
||||
const harness = makeHarness();
|
||||
await harness.trigger(
|
||||
"before_agent_start",
|
||||
{},
|
||||
{
|
||||
ui: { setWidget() {}, setStatus() {} },
|
||||
sessionManager: { getSessionId: () => "session-test" },
|
||||
},
|
||||
);
|
||||
|
||||
const result = await harness.execTool("TaskList", {});
|
||||
expect(result.content[0].text).toContain("● 1 goals (1 done hidden)");
|
||||
expect(result.content[0].text).toContain(
|
||||
"No open tasks. Completed tasks are hidden by default.",
|
||||
);
|
||||
|
||||
const reloaded = new TaskStore(taskPath);
|
||||
expect(reloaded.get("1")?.status).toBe("completed");
|
||||
});
|
||||
|
||||
it("keeps persisted completed tasks on startup even when one open goal remains", async () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), "pi-proof-tasks-"));
|
||||
tempDirs.push(dir);
|
||||
const taskPath = join(dir, "tasks.json");
|
||||
process.env.PI_TASKS = taskPath;
|
||||
|
||||
const seeded = new TaskStore(taskPath);
|
||||
seeded.create("Open goal", "Desc", "done");
|
||||
seeded.create("Finished work", "Desc", "done", undefined, undefined, "1");
|
||||
seeded.complete("2");
|
||||
|
||||
const harness = makeHarness();
|
||||
await harness.trigger(
|
||||
"before_agent_start",
|
||||
{},
|
||||
{
|
||||
ui: { setWidget() {}, setStatus() {} },
|
||||
sessionManager: { getSessionId: () => "session-test" },
|
||||
},
|
||||
);
|
||||
|
||||
const result = await harness.execTool("TaskList", {});
|
||||
await harness.execTool("TaskCreate", {
|
||||
subject: "Fix auth bug",
|
||||
done_criterion: "test passes",
|
||||
failure_mode: "doesn't cover expired tokens",
|
||||
});
|
||||
const result = await harness.execTool("TaskGet", { taskId: "1" });
|
||||
const text = result.content[0].text;
|
||||
expect(text).toContain("● 2 goals (1 done hidden, 1 open)");
|
||||
expect(text).toContain("◻ #1 Open goal");
|
||||
expect(text).not.toContain("Finished work");
|
||||
|
||||
const reloaded = new TaskStore(taskPath);
|
||||
expect(reloaded.get("2")?.status).toBe("completed");
|
||||
});
|
||||
|
||||
it("keeps completed tasks persisted by default across later turns", async () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), "pi-proof-tasks-"));
|
||||
tempDirs.push(dir);
|
||||
const taskPath = join(dir, "tasks.json");
|
||||
process.env.PI_TASKS = taskPath;
|
||||
|
||||
const harness = makeHarness();
|
||||
await harness.execTool("TaskCreate", {
|
||||
subject: "Persistent completed goal",
|
||||
description: "Desc",
|
||||
done_criterion: "done",
|
||||
});
|
||||
await harness.execTool("TaskCreate", {
|
||||
subject: "Checklist item",
|
||||
description: "Desc",
|
||||
done_criterion: "done",
|
||||
parentId: "1",
|
||||
});
|
||||
await harness.execTool("TaskUpdate", { taskId: "2", status: "completed" });
|
||||
|
||||
for (let turn = 0; turn < 8; turn++) {
|
||||
await harness.trigger("turn_start", {}, {
|
||||
ui: { setWidget() {}, setStatus() {} },
|
||||
sessionManager: { getSessionId: () => "session-test" },
|
||||
});
|
||||
}
|
||||
|
||||
const reloaded = new TaskStore(taskPath);
|
||||
expect(reloaded.get("2")?.status).toBe("completed");
|
||||
});
|
||||
|
||||
it("stores named PI_TASKS lists inside the repo .pi/tasks directory", async () => {
|
||||
process.env.PI_TASKS = `named-${Date.now()}`;
|
||||
const expectedPath = join(
|
||||
process.cwd(),
|
||||
".pi",
|
||||
"tasks",
|
||||
`${process.env.PI_TASKS}.json`,
|
||||
);
|
||||
try {
|
||||
rmSync(expectedPath);
|
||||
} catch {}
|
||||
try {
|
||||
rmSync(expectedPath + ".lock");
|
||||
} catch {}
|
||||
try {
|
||||
rmSync(expectedPath + ".tmp");
|
||||
} catch {}
|
||||
|
||||
const harness = makeHarness();
|
||||
await harness.execTool("TaskCreate", {
|
||||
subject: "Repo local task",
|
||||
description: "Desc",
|
||||
done_criterion: "done",
|
||||
});
|
||||
|
||||
const reloaded = new TaskStore(expectedPath);
|
||||
expect(reloaded.get("1")?.subject).toBe("Repo local task");
|
||||
|
||||
try {
|
||||
rmSync(expectedPath);
|
||||
} catch {}
|
||||
try {
|
||||
rmSync(expectedPath + ".lock");
|
||||
} catch {}
|
||||
try {
|
||||
rmSync(expectedPath + ".tmp");
|
||||
} catch {}
|
||||
expect(text).toContain("#1 Fix auth bug");
|
||||
expect(text).toContain("Status: pending");
|
||||
expect(text).toContain("Done when: test passes");
|
||||
expect(text).toContain("Failure mode: doesn't cover expired tokens");
|
||||
});
|
||||
});
|
||||
|
||||
+61
-63
@@ -6,7 +6,7 @@ import { TaskStore } from "../src/task-store.js";
|
||||
|
||||
// Helper: create a subtask, which can be ticked off directly.
|
||||
function createSubtask(store: TaskStore, subject: string) {
|
||||
const parent = store.create(`${subject} parent`, "Desc", "done criterion");
|
||||
const parent = store.create(`${subject} parent`, "done criterion");
|
||||
return store.create(
|
||||
subject,
|
||||
"Desc",
|
||||
@@ -25,19 +25,19 @@ describe("TaskStore (in-memory)", () => {
|
||||
});
|
||||
|
||||
it("creates tasks with auto-incrementing IDs", () => {
|
||||
const t1 = store.create("First task", "Description 1", "criterion 1");
|
||||
const t2 = store.create("Second task", "Description 2", "criterion 2");
|
||||
const t1 = store.create("First task", "criterion 1");
|
||||
const t2 = store.create("Second task", "criterion 2");
|
||||
|
||||
expect(t1.id).toBe("1");
|
||||
expect(t2.id).toBe("2");
|
||||
expect(t1.status).toBe("pending");
|
||||
expect(t1.subject).toBe("First task");
|
||||
expect(t1.description).toBe("Description 1");
|
||||
expect(t1.done_criterion).toBe("criterion 1");
|
||||
expect(t1.done_criterion).toBe("criterion 1");
|
||||
});
|
||||
|
||||
it("creates tasks with optional fields", () => {
|
||||
const t = store.create("Task", "Desc", "done criterion", "Running task", {
|
||||
const t = store.create("Task", "done criterion", undefined, "Running task", {
|
||||
key: "value",
|
||||
});
|
||||
|
||||
@@ -46,7 +46,7 @@ describe("TaskStore (in-memory)", () => {
|
||||
});
|
||||
|
||||
it("gets a task by ID", () => {
|
||||
store.create("Test", "Desc", "done");
|
||||
store.create("Test", "done");
|
||||
const task = store.get("1");
|
||||
|
||||
expect(task).toBeDefined();
|
||||
@@ -58,16 +58,16 @@ describe("TaskStore (in-memory)", () => {
|
||||
});
|
||||
|
||||
it("lists all tasks sorted by ID", () => {
|
||||
store.create("Task 3", "Desc", "done");
|
||||
store.create("Task 1", "Desc", "done");
|
||||
store.create("Task 2", "Desc", "done");
|
||||
store.create("Task 3", "done");
|
||||
store.create("Task 1", "done");
|
||||
store.create("Task 2", "done");
|
||||
|
||||
const tasks = store.list();
|
||||
expect(tasks.map((t) => t.id)).toEqual(["1", "2", "3"]);
|
||||
});
|
||||
|
||||
it("updates task status", () => {
|
||||
store.create("Test", "Desc", "done");
|
||||
store.create("Test", "done");
|
||||
const { task, changedFields } = store.update("1", {
|
||||
status: "in_progress",
|
||||
});
|
||||
@@ -77,7 +77,7 @@ describe("TaskStore (in-memory)", () => {
|
||||
});
|
||||
|
||||
it("updates multiple fields at once", () => {
|
||||
store.create("Test", "Desc", "done");
|
||||
store.create("Test", "done");
|
||||
const { changedFields } = store.update("1", {
|
||||
subject: "Updated subject",
|
||||
description: "Updated desc",
|
||||
@@ -94,7 +94,7 @@ describe("TaskStore (in-memory)", () => {
|
||||
});
|
||||
|
||||
it("deletes a task with status: deleted", () => {
|
||||
store.create("Test", "Desc", "done");
|
||||
store.create("Test", "done");
|
||||
const { changedFields } = store.update("1", { status: "deleted" });
|
||||
|
||||
expect(changedFields).toEqual(["deleted"]);
|
||||
@@ -103,16 +103,16 @@ describe("TaskStore (in-memory)", () => {
|
||||
});
|
||||
|
||||
it("preserves ID counter after deletion", () => {
|
||||
store.create("Task 1", "Desc", "done");
|
||||
store.create("Task 2", "Desc", "done");
|
||||
store.create("Task 1", "done");
|
||||
store.create("Task 2", "done");
|
||||
store.update("1", { status: "deleted" });
|
||||
|
||||
const t3 = store.create("Task 3", "Desc", "done");
|
||||
const t3 = store.create("Task 3", "done");
|
||||
expect(t3.id).toBe("3"); // Not "1" — counter continues
|
||||
});
|
||||
|
||||
it("merges metadata with null key deletion", () => {
|
||||
store.create("Test", "Desc", "done", undefined, { a: 1, b: 2, c: 3 });
|
||||
store.create("Test", "done", undefined, undefined, { a: 1, b: 2, c: 3 });
|
||||
store.update("1", { metadata: { b: null, d: 4 } });
|
||||
|
||||
const task = store.get("1")!;
|
||||
@@ -120,8 +120,8 @@ describe("TaskStore (in-memory)", () => {
|
||||
});
|
||||
|
||||
it("sets up bidirectional blocks via add_blocks", () => {
|
||||
store.create("Blocker", "Desc", "done");
|
||||
store.create("Blocked", "Desc", "done");
|
||||
store.create("Blocker", "done");
|
||||
store.create("Blocked", "done");
|
||||
|
||||
store.update("1", { add_blocks: ["2"] });
|
||||
|
||||
@@ -132,8 +132,8 @@ describe("TaskStore (in-memory)", () => {
|
||||
});
|
||||
|
||||
it("sets up bidirectional blocks via add_blocked_by", () => {
|
||||
store.create("Blocker", "Desc", "done");
|
||||
store.create("Blocked", "Desc", "done");
|
||||
store.create("Blocker", "done");
|
||||
store.create("Blocked", "done");
|
||||
|
||||
store.update("2", { add_blocked_by: ["1"] });
|
||||
|
||||
@@ -144,8 +144,8 @@ describe("TaskStore (in-memory)", () => {
|
||||
});
|
||||
|
||||
it("does not duplicate dependency edges", () => {
|
||||
store.create("A", "Desc", "done");
|
||||
store.create("B", "Desc", "done");
|
||||
store.create("A", "done");
|
||||
store.create("B", "done");
|
||||
|
||||
store.update("1", { add_blocks: ["2"] });
|
||||
store.update("1", { add_blocks: ["2"] }); // duplicate
|
||||
@@ -155,8 +155,8 @@ describe("TaskStore (in-memory)", () => {
|
||||
});
|
||||
|
||||
it("cleans up dependency edges on deletion", () => {
|
||||
store.create("A", "Desc", "done");
|
||||
store.create("B", "Desc", "done");
|
||||
store.create("A", "done");
|
||||
store.create("B", "done");
|
||||
store.update("1", { add_blocks: ["2"] });
|
||||
|
||||
store.update("1", { status: "deleted" });
|
||||
@@ -166,8 +166,8 @@ describe("TaskStore (in-memory)", () => {
|
||||
});
|
||||
|
||||
it("clears completed tasks", () => {
|
||||
store.create("Completed", "Desc", "done");
|
||||
store.create("Pending", "Desc", "done");
|
||||
store.create("Completed", "done");
|
||||
store.create("Pending", "done");
|
||||
store.complete("1");
|
||||
|
||||
const count = store.clearCompleted();
|
||||
@@ -184,24 +184,22 @@ describe("TaskStore (in-memory)", () => {
|
||||
expect(changedFields).toContain("status");
|
||||
});
|
||||
|
||||
it("blocks TaskUpdate(status=completed) for top-level tasks", () => {
|
||||
store.create("Goal", "Desc", "done");
|
||||
expect(() => store.update("1", { status: "completed" })).toThrow(
|
||||
"Top-level task #1 requires proof",
|
||||
);
|
||||
it("allows TaskUpdate(status=completed) for top-level tasks (no proof gate)", () => {
|
||||
store.create("Goal", "done");
|
||||
const { task } = store.update("1", { status: "completed" });
|
||||
expect(task?.status).toBe("completed");
|
||||
});
|
||||
|
||||
it("keeps top-level completion gated even after proof evidence exists", () => {
|
||||
store.create("Escalated", "Desc", "done");
|
||||
it("allows top-level completion via TaskUpdate (evidence is for TaskComplete)", () => {
|
||||
store.create("Escalated", "done");
|
||||
store.update("1", { metadata: { lgtm_evidence: "literal output" } });
|
||||
expect(() => store.update("1", { status: "completed" })).toThrow(
|
||||
"TaskClaimDone",
|
||||
);
|
||||
const { task } = store.update("1", { status: "completed" });
|
||||
expect(task?.status).toBe("completed");
|
||||
});
|
||||
|
||||
it("rejects changing parentId after creation", () => {
|
||||
store.create("Parent", "Desc", "done");
|
||||
store.create("Child", "Desc", "done");
|
||||
store.create("Parent", "done");
|
||||
store.create("Child", "done");
|
||||
expect(() => store.update("2", { parentId: "1" })).toThrow(
|
||||
"parentId is creation-only",
|
||||
);
|
||||
@@ -216,7 +214,7 @@ describe("TaskStore (in-memory)", () => {
|
||||
});
|
||||
|
||||
it("complete() is the internal proof-review completion path", () => {
|
||||
store.create("Test", "Desc", "done");
|
||||
store.create("Test", "done");
|
||||
const task = store.complete("1");
|
||||
expect(task.status).toBe("completed");
|
||||
});
|
||||
@@ -232,7 +230,7 @@ describe("TaskStore (in-memory)", () => {
|
||||
});
|
||||
|
||||
it("delete method works", () => {
|
||||
store.create("Test", "Desc", "done");
|
||||
store.create("Test", "done");
|
||||
expect(store.delete("1")).toBe(true);
|
||||
expect(store.delete("1")).toBe(false); // already deleted
|
||||
expect(store.list()).toHaveLength(0);
|
||||
@@ -250,8 +248,8 @@ describe("TaskStore (in-memory)", () => {
|
||||
});
|
||||
|
||||
it("allows circular dependencies with warning", () => {
|
||||
store.create("A", "Desc", "done");
|
||||
store.create("B", "Desc", "done");
|
||||
store.create("A", "done");
|
||||
store.create("B", "done");
|
||||
store.update("1", { add_blocks: ["2"] });
|
||||
const { warnings } = store.update("2", { add_blocks: ["1"] });
|
||||
|
||||
@@ -261,33 +259,33 @@ describe("TaskStore (in-memory)", () => {
|
||||
});
|
||||
|
||||
it("allows self-dependency with warning", () => {
|
||||
store.create("Self", "Desc", "done");
|
||||
store.create("Self", "done");
|
||||
const { warnings } = store.update("1", { add_blocks: ["1"] });
|
||||
expect(store.get("1")!.blocks).toContain("1");
|
||||
expect(warnings).toContain("#1 blocks itself");
|
||||
});
|
||||
|
||||
it("stores dangling edge IDs with warning", () => {
|
||||
store.create("Real", "Desc", "done");
|
||||
store.create("Real", "done");
|
||||
const { warnings } = store.update("1", { add_blocks: ["9999"] });
|
||||
expect(store.get("1")!.blocks).toContain("9999");
|
||||
expect(warnings).toContain("#9999 does not exist");
|
||||
});
|
||||
|
||||
it("returns no warnings for valid dependencies", () => {
|
||||
store.create("A", "Desc", "done");
|
||||
store.create("B", "Desc", "done");
|
||||
store.create("A", "done");
|
||||
store.create("B", "done");
|
||||
const { warnings } = store.update("1", { add_blocks: ["2"] });
|
||||
expect(warnings).toEqual([]);
|
||||
});
|
||||
|
||||
it("accepts whitespace-only subjects (matches Claude Code)", () => {
|
||||
const t = store.create(" ", "Desc", "done");
|
||||
const t = store.create(" ", "done");
|
||||
expect(t.subject).toBe(" ");
|
||||
});
|
||||
|
||||
it("updates progress_label field", () => {
|
||||
store.create("Test", "Desc", "done");
|
||||
store.create("Test", "done");
|
||||
const { changedFields } = store.update("1", {
|
||||
progress_label: "Running tests",
|
||||
});
|
||||
@@ -305,7 +303,7 @@ describe("TaskStore (in-memory)", () => {
|
||||
});
|
||||
|
||||
it("updates done_criterion field", () => {
|
||||
store.create("Test", "Desc", "original criterion");
|
||||
store.create("Test", "original criterion");
|
||||
const { changedFields } = store.update("1", {
|
||||
done_criterion: "updated criterion",
|
||||
});
|
||||
@@ -323,8 +321,8 @@ describe("TaskStore (in-memory)", () => {
|
||||
});
|
||||
|
||||
it("clearCompleted cleans up dependency edges", () => {
|
||||
store.create("Blocker", "Desc", "done");
|
||||
store.create("Blocked", "Desc", "done");
|
||||
store.create("Blocker", "done");
|
||||
store.create("Blocked", "done");
|
||||
store.update("1", { add_blocks: ["2"] });
|
||||
// complete() is the internal proof-review completion path.
|
||||
store.complete("1");
|
||||
@@ -336,9 +334,9 @@ describe("TaskStore (in-memory)", () => {
|
||||
});
|
||||
|
||||
it("handles multiple add_blocks in one call", () => {
|
||||
store.create("Blocker", "Desc", "done");
|
||||
store.create("B1", "Desc", "done");
|
||||
store.create("B2", "Desc", "done");
|
||||
store.create("Blocker", "done");
|
||||
store.create("B1", "done");
|
||||
store.create("B2", "done");
|
||||
|
||||
store.update("1", { add_blocks: ["2", "3"] });
|
||||
|
||||
@@ -348,37 +346,37 @@ describe("TaskStore (in-memory)", () => {
|
||||
});
|
||||
|
||||
it("add_blocked_by warns on self-dependency", () => {
|
||||
store.create("Self", "Desc", "done");
|
||||
store.create("Self", "done");
|
||||
const { warnings } = store.update("1", { add_blocked_by: ["1"] });
|
||||
expect(store.get("1")!.blockedBy).toContain("1");
|
||||
expect(warnings).toContain("#1 blocks itself");
|
||||
});
|
||||
|
||||
it("add_blocked_by warns on dangling ref", () => {
|
||||
store.create("Real", "Desc", "done");
|
||||
store.create("Real", "done");
|
||||
const { warnings } = store.update("1", { add_blocked_by: ["9999"] });
|
||||
expect(store.get("1")!.blockedBy).toContain("9999");
|
||||
expect(warnings).toContain("#9999 does not exist");
|
||||
});
|
||||
|
||||
it("add_blocked_by warns on cycle", () => {
|
||||
store.create("A", "Desc", "done");
|
||||
store.create("B", "Desc", "done");
|
||||
store.create("A", "done");
|
||||
store.create("B", "done");
|
||||
store.update("1", { add_blocks: ["2"] });
|
||||
const { warnings } = store.update("1", { add_blocked_by: ["2"] });
|
||||
expect(warnings).toContain("cycle: #1 and #2 block each other");
|
||||
});
|
||||
|
||||
it("clearCompleted returns 0 when no completed tasks", () => {
|
||||
store.create("Pending", "Desc", "done");
|
||||
store.create("Pending", "done");
|
||||
expect(store.clearCompleted()).toBe(0);
|
||||
});
|
||||
|
||||
it("list sorts pending → in_progress → completed with all three present", () => {
|
||||
store.create("Pending task", "Desc", "done");
|
||||
store.create("Completed task", "Desc", "done");
|
||||
store.create("In-progress task", "Desc", "done");
|
||||
store.create("Another pending", "Desc", "done");
|
||||
store.create("Pending task", "done");
|
||||
store.create("Completed task", "done");
|
||||
store.create("In-progress task", "done");
|
||||
store.create("Another pending", "done");
|
||||
|
||||
store.complete("2");
|
||||
store.update("3", { status: "in_progress" });
|
||||
|
||||
Reference in New Issue
Block a user