simplify proof logs and keep tasks repo-local

This commit is contained in:
wassname
2026-06-14 20:09:30 +08:00
parent 29c928c805
commit c0ceb95ea4
18 changed files with 5496 additions and 3683 deletions
+8 -9
View File
@@ -13,7 +13,7 @@ A [pi](https://pi.dev) extension that adds proof-gated top-level tasks to task t
The core idea: subtasks are normal checklist items, but top-level tasks are goals. Agents cannot mark top-level tasks complete directly. They must call `TaskClaimDone` with auditable evidence, UAT hints, and explicit failure-mode analysis. A fresh judge then accepts or rejects the claim. Accepted review completes the task; rejected review leaves it open with suggestions. The core idea: subtasks are normal checklist items, but top-level tasks are goals. Agents cannot mark top-level tasks complete directly. They must call `TaskClaimDone` with auditable evidence, UAT hints, and explicit failure-mode analysis. A fresh judge then accepts or rejects the claim. Accepted review completes the task; rejected review leaves it open with suggestions.
Humans can use `/lgtm` to view the proof log and sanity-check the reviewer notes later. `/lgtm` is intentionally thin: proof viewing lives there, task management stays in `/tasks`. Humans can use `/lgtm` to view the proof log and sanity-check the reviewer notes later. `/lgtm` is intentionally thin: proof viewing lives there, task management stays in `/tasks`. Long submitted-evidence blocks are previewed inline and truncated after about 16 lines, with the full artifact path shown in the proof log.
## Install ## Install
@@ -44,9 +44,8 @@ Stripped: `TaskExecute`, `TaskOutput`, `TaskStop`, `process-tracker.ts`, subagen
## Widget ## Widget
``` ```
● 3 tasks (1 done, 1 in progress, 1 open) ● 3 goals (1 done hidden, 1 in progress, 1 open)
#1 Design schema #2 Implementing cache layer… (2m 49s, ↑ 4.1k ↓ 1.2k)
✳ #2 Implementing cache layer… (2m 49s · ↑ 4.1k ↓ 1.2k)
◻ #3 Load test ◻ #3 Load test
``` ```
@@ -142,9 +141,9 @@ Interactive task-management menu: view tasks, create task, delete a selected tas
```text ```text
Top-level task: Top-level task:
pending -> in_progress -> TaskClaimDone pending -> in_progress -> TaskClaimDone
-> current evidence iteration N 🛠 -> current evidence iteration N
-> robot review iteration(s) 🤖 -> robot review iteration(s)
-> completed if latest robot review accepts -> completed if latest robot review accepts
-> remains open if reviewer rejects -> remains open if reviewer rejects
-> completed if reviewer infrastructure fails (fail-open, note logged) -> completed if reviewer infrastructure fails (fail-open, note logged)
-> lgtm_supersede or newer TaskClaimDone -> superseded history + fresh current evidence -> lgtm_supersede or newer TaskClaimDone -> superseded history + fresh current evidence
@@ -168,7 +167,7 @@ Override via env:
```bash ```bash
PI_TASKS=off # in-memory (CI) PI_TASKS=off # in-memory (CI)
PI_TASKS=sprint-1 # named shared list at ~/.pi/tasks/sprint-1.json PI_TASKS=sprint-1 # named project-local list at .pi/tasks/sprint-1.json
PI_TASKS=/abs/path # explicit path PI_TASKS=/abs/path # explicit path
PI_TASKS_DEBUG=1 # trace to stderr PI_TASKS_DEBUG=1 # trace to stderr
``` ```
@@ -178,7 +177,7 @@ PI_TASKS_DEBUG=1 # trace to stderr
```text ```text
src/ src/
├── index.ts # tools + /tasks + /lgtm evidence viewer + widget + event handlers ├── index.ts # tools + /tasks + /lgtm evidence viewer + widget + event handlers
├── review-badges.ts # Review badge helpers for evidence/review/completion lanes ├── review-badges.ts # Review state helpers for proof/completion lanes
├── robot-review.ts # Robot review iteration storage + compatibility helpers ├── robot-review.ts # Robot review iteration storage + compatibility helpers
├── types.ts # Task, TaskStatus types ├── types.ts # Task, TaskStatus types
├── task-store.ts # File-backed store with CRUD, locking, complete() method ├── task-store.ts # File-backed store with CRUD, locking, complete() method
+70 -66
View File
@@ -13,79 +13,83 @@ import type { TaskStore } from "./task-store.js";
export type AutoClearMode = "never" | "on_list_complete" | "on_task_complete"; export type AutoClearMode = "never" | "on_list_complete" | "on_task_complete";
export class AutoClearManager { export class AutoClearManager {
/** Per-task: turn when task was marked completed ("on_task_complete" mode). */ /** Per-task: turn when task was marked completed ("on_task_complete" mode). */
private completedAtTurn = new Map<string, number>(); private completedAtTurn = new Map<string, number>();
/** Turn when ALL tasks became completed ("on_list_complete" mode). */ /** Turn when ALL tasks became completed ("on_list_complete" mode). */
private allCompletedAtTurn: number | null = null; private allCompletedAtTurn: number | null = null;
constructor( constructor(
private getStore: () => TaskStore, private getStore: () => TaskStore,
private getMode: () => AutoClearMode, private getMode: () => AutoClearMode,
/** How many turns completed tasks linger before auto-clearing. */ /** How many turns completed tasks linger before auto-clearing. */
private clearDelayTurns = 4, private clearDelayTurns = 4,
) {} ) {}
/** Record a task completion. Call AFTER cascade logic. */ /** Record a task completion. Call AFTER cascade logic. */
trackCompletion(taskId: string, currentTurn: number): void { trackCompletion(taskId: string, currentTurn: number): void {
const mode = this.getMode(); const mode = this.getMode();
if (mode === "never") return; if (mode === "never") return;
if (mode === "on_task_complete") { if (mode === "on_task_complete") {
this.completedAtTurn.set(taskId, currentTurn); this.completedAtTurn.set(taskId, currentTurn);
} else if (mode === "on_list_complete") { } else if (mode === "on_list_complete") {
this.checkAllCompleted(currentTurn); this.checkAllCompleted(currentTurn);
} }
} }
/** Check if all tasks are completed and start/reset the batch countdown. */ /** Check if all tasks are completed and start/reset the batch countdown. */
private checkAllCompleted(currentTurn: number): void { private checkAllCompleted(currentTurn: number): void {
const tasks = this.getStore().list(); const tasks = this.getStore().list();
if (tasks.length > 0 && tasks.every(t => t.status === "completed")) { if (tasks.length > 0 && tasks.every((t) => t.status === "completed")) {
if (this.allCompletedAtTurn === null) this.allCompletedAtTurn = currentTurn; if (this.allCompletedAtTurn === null)
} else { this.allCompletedAtTurn = currentTurn;
this.allCompletedAtTurn = null; } else {
} this.allCompletedAtTurn = null;
} }
}
/** Reset batch countdown (e.g., when a new task is created or task goes non-completed). */ /** Reset batch countdown (e.g., when a new task is created or task goes non-completed). */
resetBatchCountdown(): void { resetBatchCountdown(): void {
this.allCompletedAtTurn = null; this.allCompletedAtTurn = null;
} }
/** Reset all tracking state (e.g., on new session). */ /** Reset all tracking state (e.g., on new session). */
reset(): void { reset(): void {
this.completedAtTurn.clear(); this.completedAtTurn.clear();
this.allCompletedAtTurn = null; this.allCompletedAtTurn = null;
} }
/** /**
* Called on each turn start. Deletes tasks whose linger period has expired. * Called on each turn start. Deletes tasks whose linger period has expired.
* Returns true if any tasks were cleared. * Returns true if any tasks were cleared.
*/ */
onTurnStart(currentTurn: number): boolean { onTurnStart(currentTurn: number): boolean {
const mode = this.getMode(); const mode = this.getMode();
let cleared = false; let cleared = false;
if (mode === "on_task_complete") { if (mode === "on_task_complete") {
for (const [taskId, turn] of this.completedAtTurn) { for (const [taskId, turn] of this.completedAtTurn) {
const task = this.getStore().get(taskId); const task = this.getStore().get(taskId);
if (!task || task.status !== "completed") { if (!task || task.status !== "completed") {
// Task was deleted or reverted — drop stale tracking entry // Task was deleted or reverted — drop stale tracking entry
this.completedAtTurn.delete(taskId); this.completedAtTurn.delete(taskId);
} else if (currentTurn - turn >= this.clearDelayTurns) { } else if (currentTurn - turn >= this.clearDelayTurns) {
this.getStore().delete(taskId); this.getStore().delete(taskId);
this.completedAtTurn.delete(taskId); this.completedAtTurn.delete(taskId);
cleared = true; cleared = true;
} }
} }
} else if (mode === "on_list_complete" && this.allCompletedAtTurn !== null) { } else if (
if (currentTurn - this.allCompletedAtTurn >= this.clearDelayTurns) { mode === "on_list_complete" &&
this.getStore().clearCompleted(); this.allCompletedAtTurn !== null
this.allCompletedAtTurn = null; ) {
cleared = true; if (currentTurn - this.allCompletedAtTurn >= this.clearDelayTurns) {
} this.getStore().clearCompleted();
} this.allCompletedAtTurn = null;
cleared = true;
}
}
return cleared; return cleared;
} }
} }
+2174 -1324
View File
File diff suppressed because it is too large Load Diff
+63 -76
View File
@@ -1,99 +1,86 @@
import { getLatestRobotReview, getRobotReviews } from "./robot-review.js"; import { getLatestRobotReview } from "./robot-review.js";
import type { Task } from "./types.js"; import type { Task } from "./types.js";
const STAGES = ["🛠", "🤖", "✓"] as const;
function hasCurrentEvidence(task: Task): boolean { function hasCurrentEvidence(task: Task): boolean {
return typeof task.metadata?.lgtm_evidence === "string" && task.metadata.lgtm_evidence.length > 0; return (
typeof task.metadata?.lgtm_evidence === "string" &&
task.metadata.lgtm_evidence.length > 0
);
} }
function hasEvidenceHistory(task: Task): boolean { function hasEvidenceHistory(task: Task): boolean {
return Array.isArray(task.metadata?.lgtm_history) && task.metadata.lgtm_history.length > 0; return (
Array.isArray(task.metadata?.lgtm_history) &&
task.metadata.lgtm_history.length > 0
);
} }
/** Pipeline stages: `[🛠·🤖·✓]` fills left-to-right as evidence→review→completed progresses. */
export function getReviewBadges(task: Task): string {
const filled = [
!!task.metadata?.lgtm_evidence,
getRobotReviews(task).length > 0,
task.status === "completed",
];
const slots = STAGES.map((emoji, i) => filled[i] ? emoji : "·");
return `[${slots.join("")}]`;
}
export const REVIEW_BADGES = {
evidence: STAGES[0],
robot: STAGES[1],
complete: STAGES[2],
pipeline: STAGES,
};
export type DisplayStatus = "in_progress" | "pending" | "completed"; export type DisplayStatus = "in_progress" | "pending" | "completed";
export function getDisplayStatus(task: Task): DisplayStatus { export function getDisplayStatus(task: Task): DisplayStatus {
return task.status; return task.status;
} }
export type CompletionMode = "direct" | "proof"; export type CompletionMode = "direct" | "proof";
export type ReviewState = export type ReviewState =
| "no_claim" | "no_claim"
| "claim_submitted" | "claim_submitted"
| "reviewer_failed_to_run" | "reviewer_failed_to_run"
| "reviewer_rejected" | "reviewer_rejected"
| "reviewer_accepted" | "reviewer_accepted"
| "superseded" | "superseded"
| "completed"; | "completed";
export type StateTag = "ACTIVE" | "PENDING" | "DONE";
export function getCompletionMode(task: Task): CompletionMode { export function getCompletionMode(task: Task): CompletionMode {
return task.parentId ? "direct" : "proof"; return task.parentId ? "direct" : "proof";
} }
export function getReviewState(task: Task): ReviewState { export function getReviewState(task: Task): ReviewState {
if (task.status === "completed") return "completed"; if (task.status === "completed") return "completed";
const latest = getLatestRobotReview(task); const latest = getLatestRobotReview(task);
if (latest && !latest.accepted) return "reviewer_rejected"; if (latest && !latest.accepted) return "reviewer_rejected";
if (latest?.accepted) return "reviewer_accepted"; if (latest?.accepted) return "reviewer_accepted";
if (typeof task.metadata?.robot_review_last_error === "string") return "reviewer_failed_to_run"; if (typeof task.metadata?.robot_review_last_error === "string")
if (hasCurrentEvidence(task)) return "claim_submitted"; return "reviewer_failed_to_run";
if (hasEvidenceHistory(task)) return "superseded"; if (hasCurrentEvidence(task)) return "claim_submitted";
return "no_claim"; if (hasEvidenceHistory(task)) return "superseded";
return "no_claim";
}
export function needsProofAttention(task: Task): boolean {
if (task.parentId || task.status === "completed") return false;
const state = getReviewState(task);
return (
state === "reviewer_rejected" ||
state === "reviewer_accepted" ||
state === "reviewer_failed_to_run"
);
} }
export function getGateStatus(task: Task): string { export function getGateStatus(task: Task): string {
const state = getReviewState(task); const state = getReviewState(task);
if (task.parentId) { if (task.parentId) {
return task.status === "completed" ? "completed directly as subtask" : "subtask: direct completion allowed"; return task.status === "completed"
} ? "completed directly as subtask"
if (task.status === "completed") { : "subtask: direct completion allowed";
if (typeof task.metadata?.robot_review_last_error === "string") { }
return `completed with reviewer unavailable: ${task.metadata.robot_review_last_error}`; if (task.status === "completed") {
} if (typeof task.metadata?.robot_review_last_error === "string") {
if (getLatestRobotReview(task)?.accepted) return "completed after accepted proof review"; return `completed with reviewer unavailable: ${task.metadata.robot_review_last_error}`;
return "completed"; }
} if (getLatestRobotReview(task)?.accepted)
if (state === "no_claim") return "top-level task requires TaskClaimDone evidence before completion"; return "completed after accepted proof review";
if (state === "reviewer_accepted") return "review accepted; task should be completed"; return "completed";
if (state === "reviewer_failed_to_run") { }
return `review unavailable; autonomy continues: ${task.metadata.robot_review_last_error}`; if (state === "no_claim")
} return "top-level task requires TaskClaimDone evidence before completion";
if (state === "reviewer_rejected") return "latest proof review rejected the evidence; strengthen the proof and try again"; if (state === "reviewer_accepted")
if (state === "superseded") return "current evidence superseded, waiting for a new proof claim"; return "review accepted; task should be completed";
return "proof claim submitted, automatic review still required"; if (state === "reviewer_failed_to_run") {
} return `review unavailable; autonomy continues: ${task.metadata.robot_review_last_error}`;
}
/** Short uppercase tag for compact task-list display. */ if (state === "reviewer_rejected")
export function getStateTag(task: Task): StateTag { return "latest proof review rejected the evidence; strengthen the proof and try again";
const s = getDisplayStatus(task); if (state === "superseded")
if (s === "completed") return "DONE"; return "current evidence superseded, waiting for a new proof claim";
if (s === "in_progress") return "ACTIVE"; return "proof claim submitted, automatic review still required";
return "PENDING";
}
/** Theme colour key for each state tag (only theme colours present in pi-tui are used). */
export function getStateTagColor(tag: StateTag): "accent" | "dim" | undefined {
if (tag === "ACTIVE") return "accent";
if (tag === "DONE") return "dim";
return undefined; // PENDING — default fg
} }
+267 -151
View File
@@ -3,185 +3,301 @@ import type { Task } from "./types.js";
export type RobotReviewMode = "manual" | "auto"; export type RobotReviewMode = "manual" | "auto";
export interface RobotReviewRecord { export interface RobotReviewRecord {
iteration: number; iteration: number;
reviewer: string; reviewer: string;
scope: string; scope: string;
observations: string[]; observations: string[];
concerns: string[]; concerns: string[];
suggestions: string[]; suggestions: string[];
blind_spots: string; blind_spots: string;
accepted: boolean; accepted: boolean;
evidence_complete: boolean; evidence_complete: boolean;
evidence_convincing: boolean; evidence_convincing: boolean;
missing_evidence: string[]; missing_evidence: string[];
submitted_at: string; submitted_at: string;
mode: RobotReviewMode; mode: RobotReviewMode;
raw_output?: string; raw_output?: string;
rubric?: Record<string, { reason: string; pass: boolean }>; rubric?: Record<string, { reason: string; pass: boolean }>;
} }
function toStringArray(value: unknown): string[] { function toStringArray(value: unknown): string[] {
return Array.isArray(value) ? value.filter((item): item is string => typeof item === "string") : []; return Array.isArray(value)
? value.filter((item): item is string => typeof item === "string")
: [];
} }
function extractRubric(value: unknown): Record<string, { reason: string; pass: boolean }> | undefined { function extractRubric(
if (!value || typeof value !== "object") return undefined; value: unknown,
const r: Record<string, { reason: string; pass: boolean }> = {}; ): Record<string, { reason: string; pass: boolean }> | undefined {
for (const [key, val] of Object.entries(value as Record<string, unknown>)) { if (!value || typeof value !== "object") return undefined;
if (val && typeof val === "object" && "reason" in (val as any) && "pass" in (val as any)) { const r: Record<string, { reason: string; pass: boolean }> = {};
const v = val as { reason: unknown; pass: unknown }; for (const [key, val] of Object.entries(value as Record<string, unknown>)) {
r[key] = { reason: typeof v.reason === "string" ? v.reason : "", pass: v.pass === true }; if (
} val &&
} typeof val === "object" &&
return Object.keys(r).length > 0 ? r : undefined; "reason" in (val as any) &&
"pass" in (val as any)
) {
const v = val as { reason: unknown; pass: unknown };
r[key] = {
reason: typeof v.reason === "string" ? v.reason : "",
pass: v.pass === true,
};
}
}
return Object.keys(r).length > 0 ? r : undefined;
} }
function normalizeReview(value: unknown, index: number): RobotReviewRecord | undefined { function normalizeReview(
if (!value || typeof value !== "object") return undefined; value: unknown,
const review = value as Record<string, unknown>; index: number,
const reviewer = typeof review.reviewer === "string" ? review.reviewer : "unknown"; ): RobotReviewRecord | undefined {
const scope = typeof review.scope === "string" ? review.scope : "unknown"; if (!value || typeof value !== "object") return undefined;
const observations = toStringArray(review.observations); const review = value as Record<string, unknown>;
if (observations.length === 0) return undefined; const reviewer =
return { typeof review.reviewer === "string" ? review.reviewer : "unknown";
iteration: typeof review.iteration === "number" ? review.iteration : index + 1, const scope = typeof review.scope === "string" ? review.scope : "unknown";
reviewer, const observations = toStringArray(review.observations);
scope, if (observations.length === 0) return undefined;
observations, return {
concerns: toStringArray(review.concerns), iteration:
suggestions: toStringArray(review.suggestions), typeof review.iteration === "number" ? review.iteration : index + 1,
blind_spots: typeof review.blind_spots === "string" ? review.blind_spots : "not recorded", reviewer,
accepted: typeof review.accepted === "boolean" scope,
? review.accepted observations,
: (typeof review.evidence_complete === "boolean" ? review.evidence_complete : true) concerns: toStringArray(review.concerns),
&& (typeof review.evidence_convincing === "boolean" ? review.evidence_convincing : true), suggestions: toStringArray(review.suggestions),
evidence_complete: typeof review.evidence_complete === "boolean" ? review.evidence_complete : true, blind_spots:
evidence_convincing: typeof review.evidence_convincing === "boolean" ? review.evidence_convincing : true, typeof review.blind_spots === "string"
missing_evidence: toStringArray(review.missing_evidence), ? review.blind_spots
submitted_at: typeof review.submitted_at === "string" ? review.submitted_at : new Date(0).toISOString(), : "not recorded",
mode: review.mode === "auto" ? "auto" : "manual", accepted:
raw_output: typeof review.raw_output === "string" ? review.raw_output : undefined, typeof review.accepted === "boolean"
rubric: extractRubric(review.rubric), ? review.accepted
}; : (typeof review.evidence_complete === "boolean"
? review.evidence_complete
: true) &&
(typeof review.evidence_convincing === "boolean"
? review.evidence_convincing
: true),
evidence_complete:
typeof review.evidence_complete === "boolean"
? review.evidence_complete
: true,
evidence_convincing:
typeof review.evidence_convincing === "boolean"
? review.evidence_convincing
: true,
missing_evidence: toStringArray(review.missing_evidence),
submitted_at:
typeof review.submitted_at === "string"
? review.submitted_at
: new Date(0).toISOString(),
mode: review.mode === "auto" ? "auto" : "manual",
raw_output:
typeof review.raw_output === "string" ? review.raw_output : undefined,
rubric: extractRubric(review.rubric),
};
} }
function getLegacyRobotReview(task: Task): RobotReviewRecord | undefined { function getLegacyRobotReview(task: Task): RobotReviewRecord | undefined {
const observations = toStringArray(task.metadata?.robot_review_observations); const observations = toStringArray(task.metadata?.robot_review_observations);
if (observations.length === 0) return undefined; if (observations.length === 0) return undefined;
return { return {
iteration: 1, iteration: 1,
reviewer: typeof task.metadata?.robot_review_reviewer === "string" ? task.metadata.robot_review_reviewer : "unknown", reviewer:
scope: typeof task.metadata?.robot_review_scope === "string" ? task.metadata.robot_review_scope : "unknown", typeof task.metadata?.robot_review_reviewer === "string"
observations, ? task.metadata.robot_review_reviewer
concerns: toStringArray(task.metadata?.robot_review_concerns), : "unknown",
suggestions: toStringArray(task.metadata?.robot_review_suggestions), scope:
blind_spots: typeof task.metadata?.robot_review_blind_spots === "string" ? task.metadata.robot_review_blind_spots : "not recorded", typeof task.metadata?.robot_review_scope === "string"
accepted: typeof task.metadata?.robot_review_accepted === "boolean" ? task.metadata.robot_review_scope
? task.metadata.robot_review_accepted : "unknown",
: (typeof task.metadata?.robot_review_evidence_complete === "boolean" ? task.metadata.robot_review_evidence_complete : true) observations,
&& (typeof task.metadata?.robot_review_evidence_convincing === "boolean" ? task.metadata.robot_review_evidence_convincing : true), concerns: toStringArray(task.metadata?.robot_review_concerns),
evidence_complete: typeof task.metadata?.robot_review_evidence_complete === "boolean" ? task.metadata.robot_review_evidence_complete : true, suggestions: toStringArray(task.metadata?.robot_review_suggestions),
evidence_convincing: typeof task.metadata?.robot_review_evidence_convincing === "boolean" ? task.metadata.robot_review_evidence_convincing : true, blind_spots:
missing_evidence: toStringArray(task.metadata?.robot_review_missing_evidence), typeof task.metadata?.robot_review_blind_spots === "string"
submitted_at: typeof task.metadata?.robot_review_submitted_at === "string" ? task.metadata.robot_review_submitted_at : new Date(0).toISOString(), ? task.metadata.robot_review_blind_spots
mode: task.metadata?.robot_review_mode === "auto" ? "auto" : "manual", : "not recorded",
raw_output: typeof task.metadata?.robot_review_raw_output === "string" ? task.metadata.robot_review_raw_output : undefined, accepted:
}; typeof task.metadata?.robot_review_accepted === "boolean"
? task.metadata.robot_review_accepted
: (typeof task.metadata?.robot_review_evidence_complete === "boolean"
? task.metadata.robot_review_evidence_complete
: true) &&
(typeof task.metadata?.robot_review_evidence_convincing === "boolean"
? task.metadata.robot_review_evidence_convincing
: true),
evidence_complete:
typeof task.metadata?.robot_review_evidence_complete === "boolean"
? task.metadata.robot_review_evidence_complete
: true,
evidence_convincing:
typeof task.metadata?.robot_review_evidence_convincing === "boolean"
? task.metadata.robot_review_evidence_convincing
: true,
missing_evidence: toStringArray(
task.metadata?.robot_review_missing_evidence,
),
submitted_at:
typeof task.metadata?.robot_review_submitted_at === "string"
? task.metadata.robot_review_submitted_at
: new Date(0).toISOString(),
mode: task.metadata?.robot_review_mode === "auto" ? "auto" : "manual",
raw_output:
typeof task.metadata?.robot_review_raw_output === "string"
? task.metadata.robot_review_raw_output
: undefined,
};
} }
export function getRobotReviews(task: Task): RobotReviewRecord[] { export function getRobotReviews(task: Task): RobotReviewRecord[] {
const reviews = Array.isArray(task.metadata?.robot_reviews) const reviews = Array.isArray(task.metadata?.robot_reviews)
? task.metadata.robot_reviews ? task.metadata.robot_reviews
.map((review: unknown, index: number) => normalizeReview(review, index)) .map((review: unknown, index: number) => normalizeReview(review, index))
.filter((review): review is RobotReviewRecord => review !== undefined) .filter((review): review is RobotReviewRecord => review !== undefined)
: []; : [];
if (reviews.length > 0) { if (reviews.length > 0) {
return reviews.map((review, index) => ({ ...review, iteration: index + 1 })); return reviews.map((review, index) => ({
} ...review,
const legacy = getLegacyRobotReview(task); iteration: index + 1,
return legacy ? [legacy] : []; }));
}
const legacy = getLegacyRobotReview(task);
return legacy ? [legacy] : [];
} }
export function getLatestRobotReview(task: Task): RobotReviewRecord | undefined { export function getLatestRobotReview(
const reviews = getRobotReviews(task); task: Task,
return reviews.length > 0 ? reviews[reviews.length - 1] : undefined; ): RobotReviewRecord | undefined {
const reviews = getRobotReviews(task);
return reviews.length > 0 ? reviews[reviews.length - 1] : undefined;
} }
function hasNonEmptyString(value: unknown): boolean { function hasNonEmptyString(value: unknown): boolean {
return typeof value === "string" && value.trim().length > 0; return typeof value === "string" && value.trim().length > 0;
} }
export function hasCompleteProofClaim(task: Task): boolean { export function hasCompleteProofClaim(task: Task): boolean {
const metadata = task.metadata ?? {}; const metadata = task.metadata ?? {};
return [ return (
metadata.lgtm_evidence, [
metadata.lgtm_failure_likely, metadata.lgtm_evidence,
metadata.lgtm_failure_sneaky, metadata.lgtm_failure_likely,
metadata.lgtm_failure_unknown, metadata.lgtm_failure_sneaky,
metadata.lgtm_falsification_test, metadata.lgtm_failure_unknown,
metadata.lgtm_evidence_reasoning, metadata.lgtm_falsification_test,
metadata.lgtm_remaining_uncertainty, metadata.lgtm_evidence_reasoning,
].every(hasNonEmptyString) metadata.lgtm_remaining_uncertainty,
&& Array.isArray(metadata.lgtm_verification_hints) ].every(hasNonEmptyString) &&
&& metadata.lgtm_verification_hints.some(hasNonEmptyString); Array.isArray(metadata.lgtm_verification_hints) &&
metadata.lgtm_verification_hints.some(hasNonEmptyString)
);
} }
export function shouldCompleteAfterAcceptedReview(task: Task, reviewAccepted: boolean): boolean { export function shouldCompleteAfterAcceptedReview(
return reviewAccepted && hasCompleteProofClaim(task); task: Task,
reviewAccepted: boolean,
): boolean {
return reviewAccepted && hasCompleteProofClaim(task);
} }
export function relaxAdvisoryVerificationHints(review: Omit<RobotReviewRecord, "iteration">): Omit<RobotReviewRecord, "iteration"> { export function relaxAdvisoryVerificationHints(
const rubric = review.rubric; review: Omit<RobotReviewRecord, "iteration">,
if (!rubric || review.evidence_complete !== true) return review; ): Omit<RobotReviewRecord, "iteration"> {
const requiredCoreKeys = ["evidence_covers_done_criterion", "falsification_test_runnable", "failure_modes_addressed", "evidence_distinguishes_success"]; const rubric = review.rubric;
if (!requiredCoreKeys.every((key) => rubric[key]?.pass === true)) return review; if (!rubric || review.evidence_complete !== true) return review;
const failedKeys = Object.entries(rubric) const requiredCoreKeys = [
.filter(([, item]) => item.pass !== true) "evidence_covers_done_criterion",
.map(([key]) => key); "falsification_test_runnable",
if (failedKeys.length !== 1 || failedKeys[0] !== "verification_hints_actionable") return review; ];
return { if (!requiredCoreKeys.every((key) => rubric[key]?.pass === true))
...review, return review;
accepted: true, const failedKeys = Object.entries(rubric)
evidence_convincing: true, .filter(([, item]) => item.pass !== true)
observations: [ .map(([key]) => key);
...review.observations, const advisoryKeys = [
"Verification hints were weak, but treated as advisory because the verbatim evidence already covered the done criterion.", "failure_modes_addressed",
], "evidence_distinguishes_success",
concerns: review.concerns, "verification_hints_actionable",
suggestions: review.suggestions, ];
missing_evidence: review.missing_evidence.filter((item) => item !== "verification_hints_actionable" && !/verification hint/i.test(item)), if (
}; failedKeys.length === 0 ||
!failedKeys.every((key) => advisoryKeys.includes(key))
)
return review;
const advisoryNotes: string[] = [];
if (failedKeys.includes("failure_modes_addressed")) {
advisoryNotes.push(
"Failure-mode writeup was weak, but treated as advisory because the verbatim evidence already covered the done criterion.",
);
}
if (failedKeys.includes("evidence_distinguishes_success")) {
advisoryNotes.push(
"Why-this-proves-it reasoning was weak, but treated as advisory because the packet already contained direct success evidence.",
);
}
if (failedKeys.includes("verification_hints_actionable")) {
advisoryNotes.push(
"Verification hints were weak, but treated as advisory because the verbatim evidence already covered the done criterion.",
);
}
return {
...review,
accepted: true,
evidence_convincing: true,
observations: [...review.observations, ...advisoryNotes],
concerns: review.concerns,
suggestions: review.suggestions,
missing_evidence: review.missing_evidence.filter(
(item) =>
!advisoryKeys.includes(item) &&
!/verification hint/i.test(item) &&
!/failure[- ]?mode/i.test(item) &&
!/distinguish/i.test(item),
),
};
} }
export function appendRobotReviewMetadata(task: Task, review: Omit<RobotReviewRecord, "iteration">): Record<string, unknown> { export function appendRobotReviewMetadata(
const robot_reviews = [...getRobotReviews(task), { ...review, iteration: 0 }].map((entry, index) => ({ task: Task,
...entry, review: Omit<RobotReviewRecord, "iteration">,
accepted: entry.accepted, ): Record<string, unknown> {
iteration: index + 1, const robot_reviews = [
})); ...getRobotReviews(task),
const latest = robot_reviews[robot_reviews.length - 1]; { ...review, iteration: 0 },
return { ].map((entry, index) => ({
robot_reviews, ...entry,
robot_review_reviewer: latest.reviewer, accepted: entry.accepted,
robot_review_scope: latest.scope, iteration: index + 1,
robot_review_observations: latest.observations, }));
robot_review_concerns: latest.concerns, const latest = robot_reviews[robot_reviews.length - 1];
robot_review_suggestions: latest.suggestions, return {
robot_review_blind_spots: latest.blind_spots, robot_reviews,
robot_review_accepted: latest.accepted, robot_review_reviewer: latest.reviewer,
robot_review_evidence_complete: latest.evidence_complete, robot_review_scope: latest.scope,
robot_review_evidence_convincing: latest.evidence_convincing, robot_review_observations: latest.observations,
robot_review_missing_evidence: latest.missing_evidence, robot_review_concerns: latest.concerns,
robot_review_submitted_at: latest.submitted_at, robot_review_suggestions: latest.suggestions,
robot_review_mode: latest.mode, robot_review_blind_spots: latest.blind_spots,
robot_review_raw_output: latest.raw_output ?? null, robot_review_accepted: latest.accepted,
robot_review_requires_followup: !(latest.evidence_complete && latest.evidence_convincing), robot_review_evidence_complete: latest.evidence_complete,
robot_review_iteration_count: robot_reviews.length, robot_review_evidence_convincing: latest.evidence_convincing,
}; robot_review_missing_evidence: latest.missing_evidence,
robot_review_submitted_at: latest.submitted_at,
robot_review_mode: latest.mode,
robot_review_raw_output: latest.raw_output ?? null,
robot_review_requires_followup: !(
latest.evidence_complete && latest.evidence_convincing
),
robot_review_iteration_count: robot_reviews.length,
};
} }
export function latestRobotReviewPasses(task: Task): boolean { export function latestRobotReviewPasses(task: Task): boolean {
const latest = getLatestRobotReview(task); const latest = getLatestRobotReview(task);
return latest ? latest.accepted : false; return latest ? latest.accepted : false;
} }
+297 -204
View File
@@ -2,248 +2,341 @@
* task-store.ts — File-backed task store with CRUD, dependency management, and file locking. * task-store.ts — File-backed task store with CRUD, dependency management, and file locking.
* *
* Session-scoped (default): in-memory Map — no disk I/O. * Session-scoped (default): in-memory Map — no disk I/O.
* Shared (PI_TASK_LIST_ID set): ~/.pi/tasks/<listId>.json with file locking. * Named or project stores live under <cwd>/.pi/tasks/ unless an absolute path is given.
*/ */
import { existsSync, mkdirSync, readFileSync, renameSync, unlinkSync, writeFileSync } from "node:fs"; import {
import { homedir } from "node:os"; existsSync,
mkdirSync,
readFileSync,
renameSync,
unlinkSync,
writeFileSync,
} from "node:fs";
import { dirname, isAbsolute, join } from "node:path"; import { dirname, isAbsolute, join } from "node:path";
import type { Task, TaskStatus, TaskStoreData } from "./types.js"; import type { Task, TaskStatus, TaskStoreData } from "./types.js";
const TASKS_DIR = join(homedir(), ".pi", "tasks"); const TASKS_DIR = join(process.cwd(), ".pi", "tasks");
const LOCK_RETRY_MS = 50; const LOCK_RETRY_MS = 50;
const LOCK_MAX_RETRIES = 100; // 5s max const LOCK_MAX_RETRIES = 100; // 5s max
function acquireLock(lockPath: string): void { function acquireLock(lockPath: string): void {
for (let i = 0; i < LOCK_MAX_RETRIES; i++) { for (let i = 0; i < LOCK_MAX_RETRIES; i++) {
try { try {
writeFileSync(lockPath, `${process.pid}`, { flag: "wx" }); writeFileSync(lockPath, `${process.pid}`, { flag: "wx" });
return; return;
} catch (e: any) { } catch (e: any) {
if (e.code === "EEXIST") { if (e.code === "EEXIST") {
try { try {
const pid = parseInt(readFileSync(lockPath, "utf-8"), 10); const pid = parseInt(readFileSync(lockPath, "utf-8"), 10);
if (pid && !isProcessRunning(pid)) { unlinkSync(lockPath); continue; } if (pid && !isProcessRunning(pid)) {
} catch { /* ignore */ } unlinkSync(lockPath);
const start = Date.now(); continue;
while (Date.now() - start < LOCK_RETRY_MS) { /* busy wait */ } }
continue; } catch {
} /* ignore */
throw e; }
} const start = Date.now();
} while (Date.now() - start < LOCK_RETRY_MS) {
throw new Error(`Failed to acquire lock: ${lockPath}`); /* busy wait */
}
continue;
}
throw e;
}
}
throw new Error(`Failed to acquire lock: ${lockPath}`);
} }
function releaseLock(lockPath: string): void { function releaseLock(lockPath: string): void {
try { unlinkSync(lockPath); } catch { /* ignore */ } try {
unlinkSync(lockPath);
} catch {
/* ignore */
}
} }
function isProcessRunning(pid: number): boolean { function isProcessRunning(pid: number): boolean {
try { process.kill(pid, 0); return true; } catch { return false; } try {
process.kill(pid, 0);
return true;
} catch {
return false;
}
} }
export class TaskStore { export class TaskStore {
private filePath: string | undefined; private filePath: string | undefined;
private lockPath: string | undefined; private lockPath: string | undefined;
private nextId = 1; private nextId = 1;
private tasks = new Map<string, Task>(); private tasks = new Map<string, Task>();
constructor(listIdOrPath?: string) { constructor(listIdOrPath?: string) {
if (!listIdOrPath) return; if (!listIdOrPath) return;
const isAbsPath = isAbsolute(listIdOrPath); const isAbsPath = isAbsolute(listIdOrPath);
const filePath = isAbsPath ? listIdOrPath : join(TASKS_DIR, `${listIdOrPath}.json`); const filePath = isAbsPath
mkdirSync(dirname(filePath), { recursive: true }); ? listIdOrPath
this.filePath = filePath; : join(TASKS_DIR, `${listIdOrPath}.json`);
this.lockPath = filePath + ".lock"; mkdirSync(dirname(filePath), { recursive: true });
this.load(); this.filePath = filePath;
} this.lockPath = filePath + ".lock";
this.load();
}
private load(): void { private load(): void {
if (!this.filePath || !existsSync(this.filePath)) return; if (!this.filePath || !existsSync(this.filePath)) return;
try { try {
const data: TaskStoreData = JSON.parse(readFileSync(this.filePath, "utf-8")); const data: TaskStoreData = JSON.parse(
this.nextId = data.nextId; readFileSync(this.filePath, "utf-8"),
this.tasks.clear(); );
for (const t of data.tasks) this.tasks.set(t.id, t); this.nextId = data.nextId;
} catch { /* corrupt file — start fresh */ } this.tasks.clear();
} for (const t of data.tasks) this.tasks.set(t.id, t);
} catch {
/* corrupt file — start fresh */
}
}
private save(): void { private save(): void {
if (!this.filePath) return; if (!this.filePath) return;
const tmpPath = this.filePath + ".tmp"; const tmpPath = this.filePath + ".tmp";
writeFileSync(tmpPath, JSON.stringify({ nextId: this.nextId, tasks: Array.from(this.tasks.values()) }, null, 2)); writeFileSync(
renameSync(tmpPath, this.filePath); tmpPath,
} JSON.stringify(
{ nextId: this.nextId, tasks: Array.from(this.tasks.values()) },
null,
2,
),
);
renameSync(tmpPath, this.filePath);
}
private withLock<T>(fn: () => T): T { private withLock<T>(fn: () => T): T {
if (!this.lockPath) return fn(); if (!this.lockPath) return fn();
acquireLock(this.lockPath); acquireLock(this.lockPath);
try { this.load(); const result = fn(); this.save(); return result; } try {
finally { releaseLock(this.lockPath); } this.load();
} const result = fn();
this.save();
return result;
} finally {
releaseLock(this.lockPath);
}
}
create(subject: string, description: string, done_criterion: string, progress_label?: string, metadata?: Record<string, any>, parentId?: string): Task { create(
return this.withLock(() => { subject: string,
if (parentId && !this.tasks.has(parentId)) throw new Error(`Parent task #${parentId} not found`); description: string,
const now = Date.now(); done_criterion: string,
const task: Task = { progress_label?: string,
id: String(this.nextId++), metadata?: Record<string, any>,
subject, description, done_criterion, parentId?: string,
parentId, ): Task {
status: "pending", return this.withLock(() => {
progress_label, if (parentId && !this.tasks.has(parentId))
metadata: metadata ?? {}, throw new Error(`Parent task #${parentId} not found`);
blocks: [], blockedBy: [], const now = Date.now();
createdAt: now, updatedAt: now, const task: Task = {
}; id: String(this.nextId++),
this.tasks.set(task.id, task); subject,
return task; description,
}); done_criterion,
} parentId,
status: "pending",
progress_label,
metadata: metadata ?? {},
blocks: [],
blockedBy: [],
createdAt: now,
updatedAt: now,
};
this.tasks.set(task.id, task);
return task;
});
}
get(id: string): Task | undefined { get(id: string): Task | undefined {
if (this.filePath) this.load(); if (this.filePath) this.load();
return this.tasks.get(id); return this.tasks.get(id);
} }
list(): Task[] { list(): Task[] {
if (this.filePath) this.load(); if (this.filePath) this.load();
return Array.from(this.tasks.values()).sort((a, b) => Number(a.id) - Number(b.id)); return Array.from(this.tasks.values()).sort(
} (a, b) => Number(a.id) - Number(b.id),
);
}
update(id: string, fields: { update(
status?: TaskStatus | "deleted"; id: string,
subject?: string; fields: {
description?: string; status?: TaskStatus | "deleted";
done_criterion?: string; subject?: string;
progress_label?: string; description?: string;
metadata?: Record<string, any>; done_criterion?: string;
parentId?: string | null; progress_label?: string;
add_blocks?: string[]; metadata?: Record<string, any>;
add_blocked_by?: string[]; parentId?: string | null;
}): { task: Task | undefined; changedFields: string[]; warnings: string[] } { add_blocks?: string[];
return this.withLock(() => { add_blocked_by?: string[];
const task = this.tasks.get(id); },
if (!task) return { task: undefined, changedFields: [], warnings: [] }; ): { task: Task | undefined; changedFields: string[]; warnings: string[] } {
return this.withLock(() => {
const task = this.tasks.get(id);
if (!task) return { task: undefined, changedFields: [], warnings: [] };
const changedFields: string[] = []; const changedFields: string[] = [];
const warnings: string[] = []; const warnings: string[] = [];
// Subtasks are normal checklist items. Top-level tasks are goals and need a proof // Subtasks are normal checklist items. Top-level tasks are goals and need a proof
// claim plus automatic review; TaskClaimDone is the only agent path that completes them. // claim plus automatic review; TaskClaimDone is the only agent path that completes them.
if (fields.status === "completed" && !task.parentId) { if (fields.status === "completed" && !task.parentId) {
throw new Error(`Top-level task #${id} requires proof. Use TaskClaimDone with evidence and failure modes; subtasks can be completed directly.`); throw new Error(
} `Top-level task #${id} requires proof. Use TaskClaimDone with evidence and failure modes; subtasks can be completed directly.`,
);
}
if (fields.status === "deleted") { if (fields.status === "deleted") {
this.tasks.delete(id); this.tasks.delete(id);
for (const t of this.tasks.values()) { for (const t of this.tasks.values()) {
t.blocks = t.blocks.filter(bid => bid !== id); t.blocks = t.blocks.filter((bid) => bid !== id);
t.blockedBy = t.blockedBy.filter(bid => bid !== id); t.blockedBy = t.blockedBy.filter((bid) => bid !== id);
} }
return { task: undefined, changedFields: ["deleted"], warnings: [] }; return { task: undefined, changedFields: ["deleted"], warnings: [] };
} }
if (fields.status !== undefined) { task.status = fields.status as TaskStatus; changedFields.push("status"); } if (fields.status !== undefined) {
if (fields.subject !== undefined) { task.subject = fields.subject; changedFields.push("subject"); } task.status = fields.status as TaskStatus;
if (fields.description !== undefined) { task.description = fields.description; changedFields.push("description"); } changedFields.push("status");
if (fields.done_criterion !== undefined) { task.done_criterion = fields.done_criterion; changedFields.push("done_criterion"); } }
if (fields.progress_label !== undefined) { task.progress_label = fields.progress_label; changedFields.push("progress_label"); } if (fields.subject !== undefined) {
task.subject = fields.subject;
changedFields.push("subject");
}
if (fields.description !== undefined) {
task.description = fields.description;
changedFields.push("description");
}
if (fields.done_criterion !== undefined) {
task.done_criterion = fields.done_criterion;
changedFields.push("done_criterion");
}
if (fields.progress_label !== undefined) {
task.progress_label = fields.progress_label;
changedFields.push("progress_label");
}
if (fields.metadata !== undefined) { if (fields.metadata !== undefined) {
for (const [key, value] of Object.entries(fields.metadata)) { for (const [key, value] of Object.entries(fields.metadata)) {
if (value === null) delete task.metadata[key]; if (value === null) delete task.metadata[key];
else task.metadata[key] = value; else task.metadata[key] = value;
} }
changedFields.push("metadata"); changedFields.push("metadata");
} }
if (fields.parentId !== undefined) { if (fields.parentId !== undefined) {
throw new Error("parentId is creation-only. Create subtasks with TaskCreate(parentId); do not downgrade top-level proof goals."); throw new Error(
} "parentId is creation-only. Create subtasks with TaskCreate(parentId); do not downgrade top-level proof goals.",
);
}
if (fields.add_blocks?.length) { if (fields.add_blocks?.length) {
for (const targetId of fields.add_blocks) { for (const targetId of fields.add_blocks) {
if (!task.blocks.includes(targetId)) task.blocks.push(targetId); if (!task.blocks.includes(targetId)) task.blocks.push(targetId);
const target = this.tasks.get(targetId); const target = this.tasks.get(targetId);
if (target && !target.blockedBy.includes(id)) { target.blockedBy.push(id); target.updatedAt = Date.now(); } if (target && !target.blockedBy.includes(id)) {
if (targetId === id) warnings.push(`#${id} blocks itself`); target.blockedBy.push(id);
else if (!target) warnings.push(`#${targetId} does not exist`); target.updatedAt = Date.now();
else if (target.blocks.includes(id)) warnings.push(`cycle: #${id} and #${targetId} block each other`); }
} if (targetId === id) warnings.push(`#${id} blocks itself`);
changedFields.push("blocks"); else if (!target) warnings.push(`#${targetId} does not exist`);
} else if (target.blocks.includes(id))
warnings.push(`cycle: #${id} and #${targetId} block each other`);
}
changedFields.push("blocks");
}
if (fields.add_blocked_by?.length) { if (fields.add_blocked_by?.length) {
for (const targetId of fields.add_blocked_by) { for (const targetId of fields.add_blocked_by) {
if (!task.blockedBy.includes(targetId)) task.blockedBy.push(targetId); if (!task.blockedBy.includes(targetId)) task.blockedBy.push(targetId);
const target = this.tasks.get(targetId); const target = this.tasks.get(targetId);
if (target && !target.blocks.includes(id)) { target.blocks.push(id); target.updatedAt = Date.now(); } if (target && !target.blocks.includes(id)) {
if (targetId === id) warnings.push(`#${id} blocks itself`); target.blocks.push(id);
else if (!target) warnings.push(`#${targetId} does not exist`); target.updatedAt = Date.now();
else if (task.blocks.includes(targetId)) warnings.push(`cycle: #${id} and #${targetId} block each other`); }
} if (targetId === id) warnings.push(`#${id} blocks itself`);
changedFields.push("blockedBy"); else if (!target) warnings.push(`#${targetId} does not exist`);
} else if (task.blocks.includes(targetId))
warnings.push(`cycle: #${id} and #${targetId} block each other`);
}
changedFields.push("blockedBy");
}
task.updatedAt = Date.now(); task.updatedAt = Date.now();
return { task, changedFields, warnings }; return { task, changedFields, warnings };
}); });
} }
/** Complete a task. Called by accepted proof review or direct subtask completion paths. */ /** Complete a task. Called by accepted proof review or direct subtask completion paths. */
complete(id: string): Task { complete(id: string): Task {
return this.withLock(() => { return this.withLock(() => {
const task = this.tasks.get(id); const task = this.tasks.get(id);
if (!task) throw new Error(`Task #${id} not found`); if (!task) throw new Error(`Task #${id} not found`);
if (task.status === "completed") throw new Error(`Task #${id} already completed`); if (task.status === "completed")
task.status = "completed"; throw new Error(`Task #${id} already completed`);
task.updatedAt = Date.now(); task.status = "completed";
return task; task.updatedAt = Date.now();
}); return task;
} });
}
delete(id: string): boolean { delete(id: string): boolean {
return this.withLock(() => { return this.withLock(() => {
if (!this.tasks.has(id)) return false; if (!this.tasks.has(id)) return false;
this.tasks.delete(id); this.tasks.delete(id);
for (const t of this.tasks.values()) { for (const t of this.tasks.values()) {
t.blocks = t.blocks.filter(bid => bid !== id); t.blocks = t.blocks.filter((bid) => bid !== id);
t.blockedBy = t.blockedBy.filter(bid => bid !== id); t.blockedBy = t.blockedBy.filter((bid) => bid !== id);
} }
return true; return true;
}); });
} }
clearAll(): number { clearAll(): number {
return this.withLock(() => { return this.withLock(() => {
const count = this.tasks.size; const count = this.tasks.size;
this.tasks.clear(); this.tasks.clear();
return count; return count;
}); });
} }
deleteFileIfEmpty(): boolean { deleteFileIfEmpty(): boolean {
if (!this.filePath || this.tasks.size > 0) return false; if (!this.filePath || this.tasks.size > 0) return false;
try { unlinkSync(this.filePath); } catch { /* ignore */ } try {
return true; unlinkSync(this.filePath);
} } catch {
/* ignore */
}
return true;
}
clearCompleted(): number { clearCompleted(): number {
return this.withLock(() => { return this.withLock(() => {
let count = 0; let count = 0;
for (const [id, task] of this.tasks) { for (const [id, task] of this.tasks) {
if (task.status === "completed") { this.tasks.delete(id); count++; } if (task.status === "completed") {
} this.tasks.delete(id);
if (count > 0) { count++;
const validIds = new Set(this.tasks.keys()); }
for (const t of this.tasks.values()) { }
t.blocks = t.blocks.filter(bid => validIds.has(bid)); if (count > 0) {
t.blockedBy = t.blockedBy.filter(bid => validIds.has(bid)); const validIds = new Set(this.tasks.keys());
} for (const t of this.tasks.values()) {
} t.blocks = t.blocks.filter((bid) => validIds.has(bid));
return count; t.blockedBy = t.blockedBy.filter((bid) => validIds.has(bid));
}); }
} }
return count;
});
}
} }
+10 -8
View File
@@ -4,20 +4,22 @@ import { mkdirSync, readFileSync, writeFileSync } from "node:fs";
import { dirname, join } from "node:path"; import { dirname, join } from "node:path";
export interface TasksConfig { export interface TasksConfig {
taskScope?: "memory" | "session" | "project"; // default: "session" taskScope?: "memory" | "session" | "project"; // default: "session"
autoCascade?: boolean; // default: false autoCascade?: boolean; // default: false
autoClearCompleted?: "never" | "on_list_complete" | "on_task_complete"; // default: "on_list_complete" autoClearCompleted?: "never" | "on_list_complete" | "on_task_complete"; // default: "never"
} }
const CONFIG_PATH = join(process.cwd(), ".pi", "tasks-config.json"); const CONFIG_PATH = join(process.cwd(), ".pi", "tasks-config.json");
export function loadTasksConfig(): TasksConfig { export function loadTasksConfig(): TasksConfig {
try { try {
return JSON.parse(readFileSync(CONFIG_PATH, "utf-8")); return JSON.parse(readFileSync(CONFIG_PATH, "utf-8"));
} catch { return {}; } } catch {
return {};
}
} }
export function saveTasksConfig(config: TasksConfig): void { export function saveTasksConfig(config: TasksConfig): void {
mkdirSync(dirname(CONFIG_PATH), { recursive: true }); mkdirSync(dirname(CONFIG_PATH), { recursive: true });
writeFileSync(CONFIG_PATH, JSON.stringify(config, null, 2)); writeFileSync(CONFIG_PATH, JSON.stringify(config, null, 2));
} }
+14 -14
View File
@@ -5,22 +5,22 @@
export type TaskStatus = "pending" | "in_progress" | "completed"; export type TaskStatus = "pending" | "in_progress" | "completed";
export interface Task { export interface Task {
id: string; id: string;
subject: string; subject: string;
description: string; description: string;
done_criterion: string; // required: what "done" looks like done_criterion: string; // required: what "done" looks like
parentId?: string; // no parent = top-level goal, requires proof claim to complete parentId?: string; // no parent = top-level goal, requires proof claim to complete
status: TaskStatus; status: TaskStatus;
progress_label?: string; progress_label?: string;
metadata: Record<string, any>; metadata: Record<string, any>;
blocks: string[]; blocks: string[];
blockedBy: string[]; blockedBy: string[];
createdAt: number; createdAt: number;
updatedAt: number; updatedAt: number;
} }
/** Serialized store format on disk. */ /** Serialized store format on disk. */
export interface TaskStoreData { export interface TaskStoreData {
nextId: number; nextId: number;
tasks: Task[]; tasks: Task[];
} }
+229 -195
View File
@@ -1,11 +1,11 @@
/** /**
* task-widget.ts — Persistent widget showing task list with status icons and progress. * task-widget.ts — Persistent widget showing open goals with simple status icons and progress.
* *
* Display style matches Claude Code's task list: * Display style:
* ✔ completed tasks (strikethrough + dim)
* ◼ in_progress tasks * ◼ in_progress tasks
* ◻ pending tasks * ◻ pending tasks
* ✳/✽ actively executing task (star spinner with progress_label text) * ✳/✽ actively executing task (star spinner with progress_label text)
* Completed tasks stay in storage but are hidden from the collapsed widget.
*/ */
import { truncateToWidth } from "@mariozechner/pi-tui"; import { truncateToWidth } from "@mariozechner/pi-tui";
@@ -15,18 +15,23 @@ import type { TaskStore } from "../task-store.js";
// ---- Types ---- // ---- Types ----
export type Theme = { export type Theme = {
fg(color: string, text: string): string; fg(color: string, text: string): string;
bold(text: string): string; bold(text: string): string;
strikethrough(text: string): string; strikethrough(text: string): string;
}; };
export type UICtx = { export type UICtx = {
setStatus(key: string, text: string | undefined): void; setStatus(key: string, text: string | undefined): void;
setWidget( setWidget(
key: string, key: string,
content: undefined | ((tui: any, theme: Theme) => { render(): string[]; invalidate(): void }), content:
options?: { placement?: "aboveEditor" | "belowEditor" }, | undefined
): void; | ((
tui: any,
theme: Theme,
) => { render(): string[]; invalidate(): void }),
options?: { placement?: "aboveEditor" | "belowEditor" },
): void;
}; };
/** Star spinner frames for animated active task indicator (matches Claude Code). */ /** Star spinner frames for animated active task indicator (matches Claude Code). */
@@ -36,225 +41,254 @@ const MAX_VISIBLE_TASKS = 5;
/** Per-task runtime metrics (elapsed time, token usage). */ /** Per-task runtime metrics (elapsed time, token usage). */
export interface TaskMetrics { export interface TaskMetrics {
startedAt: number; startedAt: number;
inputTokens: number; inputTokens: number;
outputTokens: number; outputTokens: number;
} }
/** Format milliseconds as a human-readable duration (e.g., "2m 49s", "1h 3m"). */ /** Format milliseconds as a human-readable duration (e.g., "2m 49s", "1h 3m"). */
function formatDuration(ms: number): string { function formatDuration(ms: number): string {
const totalSec = Math.floor(ms / 1000); const totalSec = Math.floor(ms / 1000);
if (totalSec < 60) return `${totalSec}s`; if (totalSec < 60) return `${totalSec}s`;
const min = Math.floor(totalSec / 60); const min = Math.floor(totalSec / 60);
const sec = totalSec % 60; const sec = totalSec % 60;
if (min < 60) return sec > 0 ? `${min}m ${sec}s` : `${min}m`; if (min < 60) return sec > 0 ? `${min}m ${sec}s` : `${min}m`;
const hr = Math.floor(min / 60); const hr = Math.floor(min / 60);
const remMin = min % 60; const remMin = min % 60;
return remMin > 0 ? `${hr}h ${remMin}m` : `${hr}h`; return remMin > 0 ? `${hr}h ${remMin}m` : `${hr}h`;
} }
/** Format token count with k suffix (e.g., "4.1k", "850"). */ /** Format token count with k suffix (e.g., "4.1k", "850"). */
function formatTokens(n: number): string { function formatTokens(n: number): string {
if (n < 1000) return String(n); if (n < 1000) return String(n);
return (n / 1000).toFixed(1).replace(/\.0$/, "") + "k"; return (n / 1000).toFixed(1).replace(/\.0$/, "") + "k";
} }
// ---- Widget ---- // ---- Widget ----
export class TaskWidget { export class TaskWidget {
private uiCtx: UICtx | undefined; private uiCtx: UICtx | undefined;
private widgetFrame = 0; private widgetFrame = 0;
private widgetInterval: ReturnType<typeof setInterval> | undefined; private widgetInterval: ReturnType<typeof setInterval> | undefined;
/** IDs of tasks currently being actively executed (show spinner). */ /** IDs of tasks currently being actively executed (show spinner). */
private activeTaskIds = new Set<string>(); private activeTaskIds = new Set<string>();
/** Per-task runtime metrics keyed by task ID. */ /** Per-task runtime metrics keyed by task ID. */
private metrics = new Map<string, TaskMetrics>(); private metrics = new Map<string, TaskMetrics>();
/** Cached TUI instance for requestRender() calls. */ /** Cached TUI instance for requestRender() calls. */
private tui: any | undefined; private tui: any | undefined;
/** Whether the widget callback is currently registered. */ /** Whether the widget callback is currently registered. */
private widgetRegistered = false; private widgetRegistered = false;
constructor(private store: TaskStore) {} constructor(private store: TaskStore) {}
setStore(store: TaskStore) { setStore(store: TaskStore) {
this.store = store; this.store = store;
} }
setUICtx(ctx: UICtx) { setUICtx(ctx: UICtx) {
this.uiCtx = ctx; this.uiCtx = ctx;
} }
/** Add or remove a task from the active spinner set. */ /** Add or remove a task from the active spinner set. */
setActiveTask(taskId: string | undefined, active = true) { setActiveTask(taskId: string | undefined, active = true) {
if (taskId && active) { if (taskId && active) {
this.activeTaskIds.add(taskId); this.activeTaskIds.add(taskId);
if (!this.metrics.has(taskId)) { if (!this.metrics.has(taskId)) {
this.metrics.set(taskId, { startedAt: Date.now(), inputTokens: 0, outputTokens: 0 }); this.metrics.set(taskId, {
} startedAt: Date.now(),
this.ensureTimer(); inputTokens: 0,
} else if (taskId) { outputTokens: 0,
this.activeTaskIds.delete(taskId); });
} }
this.update(); this.ensureTimer();
} } else if (taskId) {
this.activeTaskIds.delete(taskId);
}
this.update();
}
/** Record token usage for the currently active task(s). */ /** Record token usage for the currently active task(s). */
addTokenUsage(inputTokens: number, outputTokens: number) { addTokenUsage(inputTokens: number, outputTokens: number) {
// Distribute to all currently active tasks // Distribute to all currently active tasks
for (const id of this.activeTaskIds) { for (const id of this.activeTaskIds) {
const m = this.metrics.get(id); const m = this.metrics.get(id);
if (m) { if (m) {
m.inputTokens += inputTokens; m.inputTokens += inputTokens;
m.outputTokens += outputTokens; m.outputTokens += outputTokens;
} }
} }
} }
/** Ensure the widget update timer is running. */ /** Ensure the widget update timer is running. */
ensureTimer() { ensureTimer() {
if (!this.widgetInterval) { if (!this.widgetInterval) {
this.widgetInterval = setInterval(() => this.update(), 80); this.widgetInterval = setInterval(() => this.update(), 80);
} }
} }
/** Build widget lines from current live state. Called from the render callback. */ /** Build widget lines from current live state. Called from the render callback. */
private renderWidget(tui: any, theme: Theme): string[] { private renderWidget(tui: any, theme: Theme): string[] {
const tasks = this.store.list(); const tasks = this.store.list();
const w = tui.terminal.columns; const w = tui.terminal.columns;
const truncate = (line: string) => truncateToWidth(line, w); const truncate = (line: string) => truncateToWidth(line, w);
if (tasks.length === 0) return []; if (tasks.length === 0) return [];
const counts = { completed: 0, in_progress: 0, pending: 0 }; const counts = { completed: 0, in_progress: 0, pending: 0 };
for (const t of tasks) counts[getDisplayStatus(t)]++; for (const t of tasks) counts[getDisplayStatus(t)]++;
const parts: string[] = []; const visibleTasks = tasks.filter((task) => task.status !== "completed");
if (counts.completed > 0) parts.push(`${counts.completed} done`); if (visibleTasks.length === 0) return [];
if (counts.in_progress > 0) parts.push(`${counts.in_progress} in progress`);
if (counts.pending > 0) parts.push(`${counts.pending} open`);
const statusText = `${tasks.length} tasks (${parts.join(", ")})`;
const spinnerChar = SPINNER[this.widgetFrame % SPINNER.length]; const parts: string[] = [];
const lines: string[] = [truncate(theme.fg("accent", "●") + " " + theme.fg("accent", statusText))]; if (counts.completed > 0) parts.push(`${counts.completed} done hidden`);
if (counts.in_progress > 0) parts.push(`${counts.in_progress} in progress`);
if (counts.pending > 0) parts.push(`${counts.pending} open`);
const statusText = `${tasks.length} goals (${parts.join(", ")})`;
const visible = tasks.slice(0, MAX_VISIBLE_TASKS); const spinnerChar = SPINNER[this.widgetFrame % SPINNER.length];
for (let i = 0; i < visible.length; i++) { const lines: string[] = [
const task = visible[i]; truncate(theme.fg("accent", "●") + " " + theme.fg("accent", statusText)),
const isActive = this.activeTaskIds.has(task.id) && task.status === "in_progress"; ];
let icon: string; const visible = visibleTasks.slice(0, MAX_VISIBLE_TASKS);
if (isActive) { for (let i = 0; i < visible.length; i++) {
icon = theme.fg("accent", spinnerChar); const task = visible[i];
} else if (task.status === "completed") { const isActive =
icon = theme.fg("success", "✔"); this.activeTaskIds.has(task.id) && task.status === "in_progress";
} else if (task.status === "in_progress") {
icon = theme.fg("accent", "◼");
} else {
icon = "◻";
}
let suffix = ""; let icon: string;
if (task.status === "pending" && task.blockedBy.length > 0) { if (isActive) {
const openBlockers = task.blockedBy.filter(bid => { icon = theme.fg("accent", spinnerChar);
const blocker = this.store.get(bid); } else if (task.status === "in_progress") {
return blocker && blocker.status !== "completed"; icon = theme.fg("accent", "◼");
}); } else {
if (openBlockers.length > 0) { icon = "◻";
suffix = theme.fg("dim", ` blocked by ${openBlockers.map(id => "#" + id).join(", ")}`); }
}
}
let text: string; let suffix = "";
if (isActive) { if (task.status === "pending" && task.blockedBy.length > 0) {
const form = task.progress_label || task.subject; const openBlockers = task.blockedBy.filter((bid) => {
const m = this.metrics.get(task.id); const blocker = this.store.get(bid);
let stats = ""; return blocker && blocker.status !== "completed";
if (m) { });
const elapsed = formatDuration(Date.now() - m.startedAt); if (openBlockers.length > 0) {
const tokenParts: string[] = []; suffix = theme.fg(
if (m.inputTokens > 0) tokenParts.push(`${formatTokens(m.inputTokens)}`); "dim",
if (m.outputTokens > 0) tokenParts.push(`${formatTokens(m.outputTokens)}`); ` blocked by ${openBlockers.map((id) => "#" + id).join(", ")}`,
stats = tokenParts.length > 0 );
? ` ${theme.fg("dim", `(${elapsed} · ${tokenParts.join(" ")})`)}` }
: ` ${theme.fg("dim", `(${elapsed})`)}`; }
}
text = ` ${icon} ${theme.fg("dim", "#" + task.id)} ${theme.fg("accent", form + "…")}${stats}`;
} else if (task.status === "completed") {
text = ` ${icon} ${theme.fg("dim", theme.strikethrough("#" + task.id + " " + task.subject))}`;
} else {
text = ` ${icon} ${theme.fg("dim", "#" + task.id)} ${task.subject}`;
}
lines.push(truncate(text + suffix)); let text: string;
} if (isActive) {
const form = task.progress_label || task.subject;
const m = this.metrics.get(task.id);
let stats = "";
if (m) {
const elapsed = formatDuration(Date.now() - m.startedAt);
const tokenParts: string[] = [];
if (m.inputTokens > 0)
tokenParts.push(`${formatTokens(m.inputTokens)}`);
if (m.outputTokens > 0)
tokenParts.push(`${formatTokens(m.outputTokens)}`);
stats =
tokenParts.length > 0
? ` ${theme.fg("dim", `(${elapsed}, ${tokenParts.join(" ")})`)}`
: ` ${theme.fg("dim", `(${elapsed})`)}`;
}
text = ` ${icon} ${theme.fg("dim", "#" + task.id)} ${theme.fg("accent", form + "…")}${stats}`;
} else {
text = ` ${icon} ${theme.fg("dim", "#" + task.id)} ${task.subject}`;
}
if (tasks.length > MAX_VISIBLE_TASKS) { lines.push(truncate(text + suffix));
lines.push(truncate(theme.fg("dim", ` … and ${tasks.length - MAX_VISIBLE_TASKS} more`))); }
}
return lines; if (visibleTasks.length > MAX_VISIBLE_TASKS) {
} lines.push(
truncate(
theme.fg(
"dim",
` … and ${visibleTasks.length - MAX_VISIBLE_TASKS} more open`,
),
),
);
}
/** Force an immediate widget update. */ return lines;
update() { }
if (!this.uiCtx) return;
const tasks = this.store.list();
// Transition: visible → hidden /** Force an immediate widget update. */
if (tasks.length === 0) { update() {
if (this.widgetRegistered) { if (!this.uiCtx) return;
this.uiCtx.setWidget("tasks", undefined); const tasks = this.store.list();
this.widgetRegistered = false; const visibleTasks = tasks.filter((task) => task.status !== "completed");
}
if (this.widgetInterval) {
clearInterval(this.widgetInterval);
this.widgetInterval = undefined;
}
return;
}
// Prune stale active IDs (deleted or no longer in_progress) // Transition: visible → hidden
for (const id of this.activeTaskIds) { if (visibleTasks.length === 0) {
const t = this.store.get(id); if (this.widgetRegistered) {
if (!t || t.status !== "in_progress") { this.uiCtx.setWidget("tasks", undefined);
this.activeTaskIds.delete(id); this.widgetRegistered = false;
this.metrics.delete(id); }
} if (this.widgetInterval) {
} clearInterval(this.widgetInterval);
this.widgetInterval = undefined;
}
return;
}
// Check if any task needs animation // Prune stale active IDs (deleted or no longer in_progress)
const hasActiveSpinner = tasks.some(t => this.activeTaskIds.has(t.id) && t.status === "in_progress"); for (const id of this.activeTaskIds) {
if (hasActiveSpinner) { const t = this.store.get(id);
this.ensureTimer(); if (!t || t.status !== "in_progress") {
} else if (!hasActiveSpinner && this.widgetInterval) { this.activeTaskIds.delete(id);
clearInterval(this.widgetInterval); this.metrics.delete(id);
this.widgetInterval = undefined; }
} }
this.widgetFrame++; // Check if any task needs animation
const hasActiveSpinner = tasks.some(
(t) => this.activeTaskIds.has(t.id) && t.status === "in_progress",
);
if (hasActiveSpinner) {
this.ensureTimer();
} else if (!hasActiveSpinner && this.widgetInterval) {
clearInterval(this.widgetInterval);
this.widgetInterval = undefined;
}
// Transition: hidden → visible — register widget callback once this.widgetFrame++;
if (!this.widgetRegistered) {
this.uiCtx.setWidget("tasks", (tui, theme) => {
this.tui = tui;
return { render: () => this.renderWidget(tui, theme), invalidate: () => {} };
}, { placement: "aboveEditor" });
this.widgetRegistered = true;
} else if (this.tui) {
// Widget already registered — just request a re-render
this.tui.requestRender();
}
}
dispose() { // Transition: hidden → visible — register widget callback once
if (this.widgetInterval) { if (!this.widgetRegistered) {
clearInterval(this.widgetInterval); this.uiCtx.setWidget(
this.widgetInterval = undefined; "tasks",
} (tui, theme) => {
if (this.uiCtx) { this.tui = tui;
this.uiCtx.setWidget("tasks", undefined); return {
} render: () => this.renderWidget(tui, theme),
this.widgetRegistered = false; invalidate: () => {},
this.tui = undefined; };
} },
{ placement: "aboveEditor" },
);
this.widgetRegistered = true;
} else if (this.tui) {
// Widget already registered — just request a re-render
this.tui.requestRender();
}
}
dispose() {
if (this.widgetInterval) {
clearInterval(this.widgetInterval);
this.widgetInterval = undefined;
}
if (this.uiCtx) {
this.uiCtx.setWidget("tasks", undefined);
}
this.widgetRegistered = false;
this.tui = undefined;
}
} }
+271 -244
View File
@@ -4,318 +4,345 @@ import { AutoClearManager } from "../src/auto-clear.js";
import { TaskStore } from "../src/task-store.js"; import { TaskStore } from "../src/task-store.js";
describe("auto-clear: on_task_complete mode", () => { describe("auto-clear: on_task_complete mode", () => {
let store: TaskStore; let store: TaskStore;
let manager: AutoClearManager; let manager: AutoClearManager;
beforeEach(() => { beforeEach(() => {
store = new TaskStore(); store = new TaskStore();
manager = new AutoClearManager(() => store, () => "on_task_complete"); manager = new AutoClearManager(
}); () => store,
() => "on_task_complete",
);
});
it("does not clear completed task before REMINDER_INTERVAL turns", () => { it("does not clear completed task before REMINDER_INTERVAL turns", () => {
store.create("Task", "Desc", "done"); store.create("Task", "Desc", "done");
store.complete("1"); store.complete("1");
manager.trackCompletion("1", 1); manager.trackCompletion("1", 1);
// Turns 2, 3, 4 — not enough // Turns 2, 3, 4 — not enough
for (let turn = 2; turn <= 4; turn++) { for (let turn = 2; turn <= 4; turn++) {
manager.onTurnStart(turn); manager.onTurnStart(turn);
} }
expect(store.get("1")).toBeDefined(); expect(store.get("1")).toBeDefined();
expect(store.get("1")!.status).toBe("completed"); expect(store.get("1")!.status).toBe("completed");
}); });
it("clears completed task after REMINDER_INTERVAL turns", () => { it("clears completed task after REMINDER_INTERVAL turns", () => {
store.create("Task", "Desc", "done"); store.create("Task", "Desc", "done");
store.complete("1"); store.complete("1");
manager.trackCompletion("1", 1); manager.trackCompletion("1", 1);
// Turn 5 = turn 1 + 4 (REMINDER_INTERVAL) // Turn 5 = turn 1 + 4 (REMINDER_INTERVAL)
manager.onTurnStart(5); manager.onTurnStart(5);
expect(store.get("1")).toBeUndefined(); expect(store.get("1")).toBeUndefined();
expect(store.list()).toHaveLength(0); expect(store.list()).toHaveLength(0);
}); });
it("clears each task independently based on its own completion turn", () => { it("clears each task independently based on its own completion turn", () => {
store.create("Task A", "Desc", "done"); store.create("Task A", "Desc", "done");
store.create("Task B", "Desc", "done"); store.create("Task B", "Desc", "done");
store.complete("1"); store.complete("1");
manager.trackCompletion("1", 1); manager.trackCompletion("1", 1);
store.complete("2"); store.complete("2");
manager.trackCompletion("2", 3); manager.trackCompletion("2", 3);
// Turn 5: Task A expires (1+4), Task B still lingers (3+4=7) // Turn 5: Task A expires (1+4), Task B still lingers (3+4=7)
manager.onTurnStart(5); manager.onTurnStart(5);
expect(store.get("1")).toBeUndefined(); expect(store.get("1")).toBeUndefined();
expect(store.get("2")).toBeDefined(); expect(store.get("2")).toBeDefined();
// Turn 7: Task B expires // Turn 7: Task B expires
manager.onTurnStart(7); manager.onTurnStart(7);
expect(store.get("2")).toBeUndefined(); expect(store.get("2")).toBeUndefined();
}); });
it("does not clear pending or in_progress tasks", () => { it("does not clear pending or in_progress tasks", () => {
store.create("Pending", "Desc", "done"); store.create("Pending", "Desc", "done");
store.create("In Progress", "Desc", "done"); store.create("In Progress", "Desc", "done");
store.create("Completed", "Desc", "done"); store.create("Completed", "Desc", "done");
store.update("2", { status: "in_progress" }); store.update("2", { status: "in_progress" });
store.complete("3"); store.complete("3");
manager.trackCompletion("3", 1); manager.trackCompletion("3", 1);
manager.onTurnStart(5); manager.onTurnStart(5);
expect(store.get("1")).toBeDefined(); // pending — untouched expect(store.get("1")).toBeDefined(); // pending — untouched
expect(store.get("2")).toBeDefined(); // in_progress — untouched expect(store.get("2")).toBeDefined(); // in_progress — untouched
expect(store.get("3")).toBeUndefined(); // completed — cleared expect(store.get("3")).toBeUndefined(); // completed — cleared
}); });
it("cleans up dependency edges when auto-clearing", () => { it("cleans up dependency edges when auto-clearing", () => {
store.create("Blocker", "Desc", "done"); store.create("Blocker", "Desc", "done");
store.create("Blocked", "Desc", "done"); store.create("Blocked", "Desc", "done");
store.update("1", { add_blocks: ["2"] }); store.update("1", { add_blocks: ["2"] });
store.complete("1"); store.complete("1");
manager.trackCompletion("1", 1); manager.trackCompletion("1", 1);
manager.onTurnStart(5); manager.onTurnStart(5);
expect(store.get("1")).toBeUndefined(); expect(store.get("1")).toBeUndefined();
expect(store.get("2")!.blockedBy).toEqual([]); expect(store.get("2")!.blockedBy).toEqual([]);
}); });
it("returns true when tasks are cleared", () => { it("returns true when tasks are cleared", () => {
store.create("Task", "Desc", "done"); store.create("Task", "Desc", "done");
store.complete("1"); store.complete("1");
manager.trackCompletion("1", 1); manager.trackCompletion("1", 1);
expect(manager.onTurnStart(4)).toBe(false); expect(manager.onTurnStart(4)).toBe(false);
expect(manager.onTurnStart(5)).toBe(true); expect(manager.onTurnStart(5)).toBe(true);
}); });
}); });
describe("auto-clear: on_list_complete mode", () => { describe("auto-clear: on_list_complete mode", () => {
let store: TaskStore; let store: TaskStore;
let manager: AutoClearManager; let manager: AutoClearManager;
beforeEach(() => { beforeEach(() => {
store = new TaskStore(); store = new TaskStore();
manager = new AutoClearManager(() => store, () => "on_list_complete"); manager = new AutoClearManager(
}); () => store,
() => "on_list_complete",
);
});
it("does not clear when some tasks are still pending", () => { it("does not clear when some tasks are still pending", () => {
store.create("Done", "Desc", "done"); store.create("Done", "Desc", "done");
store.create("Pending", "Desc", "done"); store.create("Pending", "Desc", "done");
store.complete("1"); store.complete("1");
manager.trackCompletion("1", 1); manager.trackCompletion("1", 1);
for (let turn = 2; turn <= 10; turn++) { for (let turn = 2; turn <= 10; turn++) {
manager.onTurnStart(turn); manager.onTurnStart(turn);
} }
expect(store.get("1")).toBeDefined(); expect(store.get("1")).toBeDefined();
expect(store.list()).toHaveLength(2); expect(store.list()).toHaveLength(2);
}); });
it("does not clear immediately when all tasks complete", () => { it("does not clear immediately when all tasks complete", () => {
store.create("A", "Desc", "done"); store.create("A", "Desc", "done");
store.create("B", "Desc", "done"); store.create("B", "Desc", "done");
store.complete("1"); store.complete("1");
store.complete("2"); store.complete("2");
manager.trackCompletion("2", 1); manager.trackCompletion("2", 1);
// Turns 2-4: not enough // Turns 2-4: not enough
for (let turn = 2; turn <= 4; turn++) { for (let turn = 2; turn <= 4; turn++) {
manager.onTurnStart(turn); manager.onTurnStart(turn);
} }
expect(store.list()).toHaveLength(2); expect(store.list()).toHaveLength(2);
}); });
it("clears all completed tasks after REMINDER_INTERVAL turns when all are completed", () => { it("clears all completed tasks after REMINDER_INTERVAL turns when all are completed", () => {
store.create("A", "Desc", "done"); store.create("A", "Desc", "done");
store.create("B", "Desc", "done"); store.create("B", "Desc", "done");
store.complete("1"); store.complete("1");
store.complete("2"); store.complete("2");
manager.trackCompletion("2", 1); manager.trackCompletion("2", 1);
manager.onTurnStart(5); manager.onTurnStart(5);
expect(store.list()).toHaveLength(0); expect(store.list()).toHaveLength(0);
}); });
it("resets countdown when a new task is created before REMINDER_INTERVAL", () => { it("resets countdown when a new task is created before REMINDER_INTERVAL", () => {
store.create("A", "Desc", "done"); store.create("A", "Desc", "done");
store.complete("1"); store.complete("1");
manager.trackCompletion("1", 1); manager.trackCompletion("1", 1);
// Turn 3: new task created — reset countdown // Turn 3: new task created — reset countdown
manager.onTurnStart(3); manager.onTurnStart(3);
manager.resetBatchCountdown(); manager.resetBatchCountdown();
store.create("B", "Desc", "done"); store.create("B", "Desc", "done");
// Turn 5 would have cleared, but countdown was reset at turn 3 // Turn 5 would have cleared, but countdown was reset at turn 3
manager.onTurnStart(5); manager.onTurnStart(5);
expect(store.get("1")).toBeDefined(); // still around — list isn't all completed expect(store.get("1")).toBeDefined(); // still around — list isn't all completed
}); });
it("resets countdown when a task goes back to in_progress", () => { it("resets countdown when a task goes back to in_progress", () => {
store.create("A", "Desc", "done"); store.create("A", "Desc", "done");
store.create("B", "Desc", "done"); store.create("B", "Desc", "done");
store.complete("1"); store.complete("1");
store.complete("2"); store.complete("2");
manager.trackCompletion("2", 1); manager.trackCompletion("2", 1);
// Turn 3: task 2 goes back to in_progress // Turn 3: task 2 goes back to in_progress
manager.onTurnStart(3); manager.onTurnStart(3);
store.update("2", { status: "in_progress" }); store.update("2", { status: "in_progress" });
manager.resetBatchCountdown(); manager.resetBatchCountdown();
// Turn 5: would have cleared, but countdown was reset // Turn 5: would have cleared, but countdown was reset
manager.onTurnStart(5); manager.onTurnStart(5);
expect(store.list()).toHaveLength(2); // both still here expect(store.list()).toHaveLength(2); // both still here
}); });
it("returns true when tasks are cleared", () => { it("returns true when tasks are cleared", () => {
store.create("Task", "Desc", "done"); store.create("Task", "Desc", "done");
store.complete("1"); store.complete("1");
manager.trackCompletion("1", 1); manager.trackCompletion("1", 1);
expect(manager.onTurnStart(4)).toBe(false); expect(manager.onTurnStart(4)).toBe(false);
expect(manager.onTurnStart(5)).toBe(true); expect(manager.onTurnStart(5)).toBe(true);
}); });
}); });
describe("auto-clear: never mode", () => { describe("auto-clear: never mode", () => {
let store: TaskStore; let store: TaskStore;
let manager: AutoClearManager; let manager: AutoClearManager;
beforeEach(() => { beforeEach(() => {
store = new TaskStore(); store = new TaskStore();
manager = new AutoClearManager(() => store, () => "never"); manager = new AutoClearManager(
}); () => store,
() => "never",
);
});
it("never clears completed tasks regardless of turns", () => { it("never clears completed tasks regardless of turns", () => {
store.create("A", "Desc", "done"); store.create("A", "Desc", "done");
store.create("B", "Desc", "done"); store.create("B", "Desc", "done");
store.complete("1"); store.complete("1");
store.complete("2"); store.complete("2");
manager.trackCompletion("1", 1); manager.trackCompletion("1", 1);
manager.trackCompletion("2", 1); manager.trackCompletion("2", 1);
for (let turn = 2; turn <= 20; turn++) { for (let turn = 2; turn <= 20; turn++) {
manager.onTurnStart(turn); manager.onTurnStart(turn);
} }
expect(store.list()).toHaveLength(2); expect(store.list()).toHaveLength(2);
}); });
it("trackCompletion is a no-op", () => { it("trackCompletion is a no-op", () => {
store.create("Task", "Desc", "done"); store.create("Task", "Desc", "done");
store.complete("1"); store.complete("1");
manager.trackCompletion("1", 1); manager.trackCompletion("1", 1);
manager.onTurnStart(100); manager.onTurnStart(100);
expect(store.get("1")).toBeDefined(); expect(store.get("1")).toBeDefined();
}); });
}); });
describe("auto-clear: dynamic mode switching", () => { describe("auto-clear: dynamic mode switching", () => {
it("respects mode changes via getMode callback", () => { it("respects mode changes via getMode callback", () => {
const store = new TaskStore(); const store = new TaskStore();
let mode: AutoClearMode = "never"; let mode: AutoClearMode = "never";
const manager = new AutoClearManager(() => store, () => mode); const manager = new AutoClearManager(
() => store,
() => mode,
);
store.create("Task", "Desc", "done"); store.create("Task", "Desc", "done");
store.complete("1"); store.complete("1");
// Track in never mode — no-op // Track in never mode — no-op
manager.trackCompletion("1", 1); manager.trackCompletion("1", 1);
manager.onTurnStart(5); manager.onTurnStart(5);
expect(store.get("1")).toBeDefined(); expect(store.get("1")).toBeDefined();
// Switch to on_task_complete and re-track // Switch to on_task_complete and re-track
mode = "on_task_complete"; mode = "on_task_complete";
manager.trackCompletion("1", 5); manager.trackCompletion("1", 5);
manager.onTurnStart(9); manager.onTurnStart(9);
expect(store.get("1")).toBeUndefined(); expect(store.get("1")).toBeUndefined();
}); });
}); });
describe("auto-clear: store getter (session switch)", () => { describe("auto-clear: store getter (session switch)", () => {
it("operates on the current store after swap", () => { it("operates on the current store after swap", () => {
let store = new TaskStore(); let store = new TaskStore();
const manager = new AutoClearManager(() => store, () => "on_task_complete"); const manager = new AutoClearManager(
() => store,
() => "on_task_complete",
);
store.create("Old task", "Desc", "done"); store.create("Old task", "Desc", "done");
store.complete("1"); store.complete("1");
manager.trackCompletion("1", 1); manager.trackCompletion("1", 1);
// Simulate session switch — swap store // Simulate session switch — swap store
store = new TaskStore(); store = new TaskStore();
store.create("New task", "Desc", "done"); store.create("New task", "Desc", "done");
manager.reset(); manager.reset();
// Old task tracking was reset, new store has no completed tasks // Old task tracking was reset, new store has no completed tasks
manager.onTurnStart(5); manager.onTurnStart(5);
expect(store.list()).toHaveLength(1); expect(store.list()).toHaveLength(1);
expect(store.get("1")!.subject).toBe("New task"); expect(store.get("1")!.subject).toBe("New task");
}); });
it("clears from new store, not old store", () => { it("clears from new store, not old store", () => {
let store = new TaskStore(); let store = new TaskStore();
const manager = new AutoClearManager(() => store, () => "on_task_complete"); const manager = new AutoClearManager(
() => store,
() => "on_task_complete",
);
// Swap to new store with a completed task // Swap to new store with a completed task
store = new TaskStore(); store = new TaskStore();
store.create("Task in new store", "Desc", "done"); store.create("Task in new store", "Desc", "done");
store.complete("1"); store.complete("1");
manager.trackCompletion("1", 1); manager.trackCompletion("1", 1);
manager.onTurnStart(5); manager.onTurnStart(5);
expect(store.get("1")).toBeUndefined(); // cleared from new store expect(store.get("1")).toBeUndefined(); // cleared from new store
}); });
}); });
describe("auto-clear: reset (new session)", () => { describe("auto-clear: reset (new session)", () => {
it("reset clears per-task tracking so old completions don't fire", () => { it("reset clears per-task tracking so old completions don't fire", () => {
const store = new TaskStore(); const store = new TaskStore();
const manager = new AutoClearManager(() => store, () => "on_task_complete"); const manager = new AutoClearManager(
() => store,
() => "on_task_complete",
);
store.create("Task", "Desc", "done"); store.create("Task", "Desc", "done");
store.complete("1"); store.complete("1");
manager.trackCompletion("1", 1); manager.trackCompletion("1", 1);
// Simulate /new — reset before the delay expires // Simulate /new — reset before the delay expires
manager.reset(); manager.reset();
// Old completion should NOT trigger after reset // Old completion should NOT trigger after reset
manager.onTurnStart(5); manager.onTurnStart(5);
expect(store.get("1")).toBeDefined(); expect(store.get("1")).toBeDefined();
}); });
it("reset clears batch countdown so old all-completed state doesn't fire", () => { it("reset clears batch countdown so old all-completed state doesn't fire", () => {
const store = new TaskStore(); const store = new TaskStore();
const manager = new AutoClearManager(() => store, () => "on_list_complete"); const manager = new AutoClearManager(
() => store,
() => "on_list_complete",
);
store.create("Task", "Desc", "done"); store.create("Task", "Desc", "done");
store.complete("1"); store.complete("1");
manager.trackCompletion("1", 1); manager.trackCompletion("1", 1);
// Simulate /new — reset before the delay expires // Simulate /new — reset before the delay expires
manager.reset(); manager.reset();
// Old batch countdown should NOT trigger after reset // Old batch countdown should NOT trigger after reset
manager.onTurnStart(5); manager.onTurnStart(5);
expect(store.get("1")).toBeDefined(); expect(store.get("1")).toBeDefined();
}); });
it("tracking works normally after reset", () => { it("tracking works normally after reset", () => {
const store = new TaskStore(); const store = new TaskStore();
const manager = new AutoClearManager(() => store, () => "on_task_complete"); const manager = new AutoClearManager(
() => store,
() => "on_task_complete",
);
store.create("Task", "Desc", "done"); store.create("Task", "Desc", "done");
store.complete("1"); store.complete("1");
manager.trackCompletion("1", 1); manager.trackCompletion("1", 1);
manager.reset(); manager.reset();
// Re-track after reset with new turn baseline // Re-track after reset with new turn baseline
manager.trackCompletion("1", 10); manager.trackCompletion("1", 10);
manager.onTurnStart(14); manager.onTurnStart(14);
expect(store.get("1")).toBeUndefined(); expect(store.get("1")).toBeUndefined();
}); });
}); });
+128 -86
View File
@@ -2,123 +2,165 @@ import { describe, expect, it, vi } from "vitest";
import proofTasksExtension, { parseLgtmArgs } from "../src/index.js"; import proofTasksExtension, { parseLgtmArgs } from "../src/index.js";
type RegisteredTool = { type RegisteredTool = {
name: string; name: string;
execute: (...args: any[]) => Promise<any>; execute: (...args: any[]) => Promise<any>;
}; };
type RegisteredCommand = { type RegisteredCommand = {
handler: (args: string, ctx: any) => Promise<void>; handler: (args: string, ctx: any) => Promise<void>;
getArgumentCompletions?: (args: string) => Promise<string[]>; getArgumentCompletions?: (args: string) => Promise<string[]>;
}; };
function makeHarness() { function makeHarness() {
const tools = new Map<string, RegisteredTool>(); const tools = new Map<string, RegisteredTool>();
const commands = new Map<string, RegisteredCommand>(); const commands = new Map<string, RegisteredCommand>();
const sentMessages: any[] = []; const sentMessages: any[] = [];
const pi = { const pi = {
on: vi.fn(), on: vi.fn(),
registerTool: vi.fn((tool: RegisteredTool) => tools.set(tool.name, tool)), registerTool: vi.fn((tool: RegisteredTool) => tools.set(tool.name, tool)),
registerCommand: vi.fn((name: string, command: RegisteredCommand) => commands.set(name, command)), registerCommand: vi.fn((name: string, command: RegisteredCommand) =>
sendMessage: vi.fn((message: any) => sentMessages.push(message)), commands.set(name, command),
}; ),
sendMessage: vi.fn((message: any) => sentMessages.push(message)),
};
proofTasksExtension(pi as any); proofTasksExtension(pi as any);
async function execTool(name: string, params: Record<string, unknown>) { async function execTool(name: string, params: Record<string, unknown>) {
const tool = tools.get(name); const tool = tools.get(name);
if (!tool) throw new Error(`Tool ${name} not registered`); if (!tool) throw new Error(`Tool ${name} not registered`);
return tool.execute("tool-call", params, undefined, undefined, {}); return tool.execute("tool-call", params, undefined, undefined, {});
} }
function makeUi(overrides: { function makeUi(
select?: Array<string | undefined>; overrides: {
confirm?: Array<boolean>; select?: Array<string | undefined>;
} = {}) { confirm?: Array<boolean>;
const selectQueue = [...(overrides.select ?? [])]; } = {},
const confirmQueue = [...(overrides.confirm ?? [])]; ) {
return { const selectQueue = [...(overrides.select ?? [])];
notify: vi.fn(), const confirmQueue = [...(overrides.confirm ?? [])];
select: vi.fn(async () => selectQueue.shift()), return {
confirm: vi.fn(async () => confirmQueue.shift() ?? false), notify: vi.fn(),
}; select: vi.fn(async () => selectQueue.shift()),
} confirm: vi.fn(async () => confirmQueue.shift() ?? false),
};
}
return { tools, commands, sentMessages, execTool, makeUi }; return { tools, commands, sentMessages, execTool, makeUi };
} }
describe("parseLgtmArgs", () => { describe("parseLgtmArgs", () => {
it("parses menu and view forms", () => { it("parses menu and view forms", () => {
expect(parseLgtmArgs("")).toEqual({ kind: "menu" }); expect(parseLgtmArgs("")).toEqual({ kind: "menu" });
expect(parseLgtmArgs("*")).toEqual({ kind: "view_all" }); expect(parseLgtmArgs("*")).toEqual({ kind: "view_all" });
expect(parseLgtmArgs("1 #2")).toEqual({ kind: "view", ids: ["1", "2"] }); expect(parseLgtmArgs("1 #2")).toEqual({ kind: "view", ids: ["1", "2"] });
}); });
it("rejects task-management forms", () => { it("rejects task-management forms", () => {
expect(parseLgtmArgs("clear")).toEqual({ kind: "error", message: "Task management lives in /tasks now. /lgtm is viewer-only." }); expect(parseLgtmArgs("clear")).toEqual({
expect(parseLgtmArgs("clear *")).toEqual({ kind: "error", message: "Task management lives in /tasks now. /lgtm is viewer-only." }); kind: "error",
expect(parseLgtmArgs("clear #7")).toEqual({ kind: "error", message: "Task management lives in /tasks now. /lgtm is viewer-only." }); message: "Task management lives in /tasks now. /lgtm is viewer-only.",
expect(parseLgtmArgs("delete #7")).toEqual({ kind: "error", message: "Task management lives in /tasks now. /lgtm is viewer-only." }); });
}); expect(parseLgtmArgs("clear *")).toEqual({
kind: "error",
message: "Task management lives in /tasks now. /lgtm is viewer-only.",
});
expect(parseLgtmArgs("clear #7")).toEqual({
kind: "error",
message: "Task management lives in /tasks now. /lgtm is viewer-only.",
});
expect(parseLgtmArgs("delete #7")).toEqual({
kind: "error",
message: "Task management lives in /tasks now. /lgtm is viewer-only.",
});
});
}); });
describe("/lgtm command", () => { describe("/lgtm command", () => {
it("shows all open proof logs from the picker", async () => { it("shows all open proof logs from the picker", async () => {
const harness = makeHarness(); const harness = makeHarness();
await harness.execTool("TaskCreate", { subject: "Task A", description: "Desc", done_criterion: "done" }); await harness.execTool("TaskCreate", {
await harness.execTool("TaskCreate", { subject: "Task B", description: "Desc", done_criterion: "done" }); subject: "Task A",
description: "Desc",
done_criterion: "done",
});
await harness.execTool("TaskCreate", {
subject: "Task B",
description: "Desc",
done_criterion: "done",
});
const ui = harness.makeUi({ select: ["View all open proof logs"] }); const ui = harness.makeUi({ select: ["View all open proof logs"] });
const command = harness.commands.get("lgtm"); const command = harness.commands.get("lgtm");
if (!command) throw new Error("/lgtm not registered"); if (!command) throw new Error("/lgtm not registered");
await command.handler("", { ui }); await command.handler("", { ui });
expect(harness.sentMessages).toHaveLength(2); expect(harness.sentMessages).toHaveLength(2);
expect(harness.sentMessages[0].customType).toBe("proof-log"); expect(harness.sentMessages[0].customType).toBe("proof-log");
expect(harness.sentMessages[0].content).toContain("Task #1"); expect(harness.sentMessages[0].content).toContain("Task #1");
expect(harness.sentMessages[1].content).toContain("Task #2"); expect(harness.sentMessages[1].content).toContain("Task #2");
}); });
it("shows one proof log from the picker", async () => { it("shows one proof log from the picker", async () => {
const harness = makeHarness(); const harness = makeHarness();
await harness.execTool("TaskCreate", { subject: "Task A", description: "Desc", done_criterion: "done" }); await harness.execTool("TaskCreate", {
subject: "Task A",
description: "Desc",
done_criterion: "done",
});
const ui = harness.makeUi({ select: ["[PENDING] #1 Task A"] }); const ui = harness.makeUi({ select: ["[PENDING] #1 Task A"] });
const command = harness.commands.get("lgtm"); const command = harness.commands.get("lgtm");
if (!command) throw new Error("/lgtm not registered"); if (!command) throw new Error("/lgtm not registered");
await command.handler("", { ui }); await command.handler("", { ui });
expect(harness.sentMessages).toHaveLength(1); expect(harness.sentMessages).toHaveLength(1);
expect(harness.sentMessages[0].content).toContain("Task #1"); expect(harness.sentMessages[0].content).toContain("Task #1");
}); });
it("rejects /lgtm clear and points task management back to /tasks", async () => { it("rejects /lgtm clear and points task management back to /tasks", async () => {
const harness = makeHarness(); const harness = makeHarness();
await harness.execTool("TaskCreate", { subject: "Task A", description: "Desc", done_criterion: "done" }); await harness.execTool("TaskCreate", {
subject: "Task A",
description: "Desc",
done_criterion: "done",
});
const ui = harness.makeUi(); const ui = harness.makeUi();
const command = harness.commands.get("lgtm"); const command = harness.commands.get("lgtm");
if (!command) throw new Error("/lgtm not registered"); if (!command) throw new Error("/lgtm not registered");
await command.handler("clear 1", { ui }); await command.handler("clear 1", { ui });
expect(harness.sentMessages).toHaveLength(0); expect(harness.sentMessages).toHaveLength(0);
expect(ui.notify).toHaveBeenCalledWith("Task management lives in /tasks now. /lgtm is viewer-only.", "error"); expect(ui.notify).toHaveBeenCalledWith(
}); "Task management lives in /tasks now. /lgtm is viewer-only.",
"error",
);
});
it("rejects /lgtm delete and points task management back to /tasks", async () => { it("rejects /lgtm delete and points task management back to /tasks", async () => {
const harness = makeHarness(); const harness = makeHarness();
await harness.execTool("TaskCreate", { subject: "Task A", description: "Desc", done_criterion: "done" }); await harness.execTool("TaskCreate", {
subject: "Task A",
description: "Desc",
done_criterion: "done",
});
const ui = harness.makeUi(); const ui = harness.makeUi();
const command = harness.commands.get("lgtm"); const command = harness.commands.get("lgtm");
if (!command) throw new Error("/lgtm not registered"); if (!command) throw new Error("/lgtm not registered");
await command.handler("delete 1", { ui }); await command.handler("delete 1", { ui });
expect(harness.sentMessages).toHaveLength(0); expect(harness.sentMessages).toHaveLength(0);
expect(ui.notify).toHaveBeenCalledWith("Task management lives in /tasks now. /lgtm is viewer-only.", "error"); expect(ui.notify).toHaveBeenCalledWith(
}); "Task management lives in /tasks now. /lgtm is viewer-only.",
"error",
);
});
}); });
+124 -129
View File
@@ -1,150 +1,145 @@
import { describe, expect, it } from "vitest"; import { describe, expect, it } from "vitest";
import { getCompletionMode, getDisplayStatus, getGateStatus, getReviewBadges, getReviewState } from "../src/review-badges.js"; import {
getCompletionMode,
getDisplayStatus,
getGateStatus,
getReviewState,
} from "../src/review-badges.js";
import type { Task } from "../src/types.js"; import type { Task } from "../src/types.js";
function makeTask(overrides: Partial<Task> = {}): Task { function makeTask(overrides: Partial<Task> = {}): Task {
return { return {
id: "1", id: "1",
subject: "Test", subject: "Test",
description: "Desc", description: "Desc",
done_criterion: "done", done_criterion: "done",
status: "pending", status: "pending",
progress_label: undefined, progress_label: undefined,
metadata: {}, metadata: {},
blocks: [], blocks: [],
blockedBy: [], blockedBy: [],
createdAt: 0, createdAt: 0,
updatedAt: 0, updatedAt: 0,
...overrides, ...overrides,
}; };
} }
describe("getReviewBadges", () => {
it("renders all dots when no artifacts exist", () => {
expect(getReviewBadges(makeTask())).toBe("[···]");
});
it("fills evidence/review/completed slots independently", () => {
const task = makeTask({
metadata: {
lgtm_evidence: "npm test",
robot_reviews: [{
iteration: 1,
reviewer: "opencode",
scope: "task evidence",
observations: ["Observed one unchecked edge case"],
concerns: ["Evidence does not cover prod traffic."],
suggestions: ["Inspect one prod traffic sample."],
blind_spots: "Did not inspect prod traffic",
accepted: false,
evidence_complete: false,
evidence_convincing: false,
missing_evidence: ["Prod traffic sample"],
submitted_at: "2026-04-17T00:00:00.000Z",
mode: "manual",
}],
},
});
expect(getReviewBadges(task)).toBe("[🛠🤖·]");
});
it("fills the completed badge once the task is completed", () => {
const task = makeTask({
status: "completed",
metadata: { lgtm_evidence: "ok" },
});
expect(getReviewBadges(task)).toBe("[🛠·✓]");
});
});
describe("review state helpers", () => { describe("review state helpers", () => {
it("reports completion mode as proof for top-level tasks", () => { it("reports completion mode as proof for top-level tasks", () => {
expect(getCompletionMode(makeTask())).toBe("proof"); expect(getCompletionMode(makeTask())).toBe("proof");
}); });
it("reports completion mode as direct for subtasks", () => { it("reports completion mode as direct for subtasks", () => {
expect(getCompletionMode(makeTask({ parentId: "1" }))).toBe("direct"); expect(getCompletionMode(makeTask({ parentId: "1" }))).toBe("direct");
}); });
it("reports superseded when only history remains", () => { it("reports superseded when only history remains", () => {
expect(getReviewState(makeTask({ metadata: { lgtm_history: [{ iteration: 1 }] } }))).toBe("superseded"); expect(
}); getReviewState(
makeTask({ metadata: { lgtm_history: [{ iteration: 1 }] } }),
),
).toBe("superseded");
});
}); });
describe("getGateStatus", () => { describe("getGateStatus", () => {
it("reports top-level proof requirement before evidence", () => { it("reports top-level proof requirement before evidence", () => {
expect(getGateStatus(makeTask())).toBe("top-level task requires TaskClaimDone evidence before completion"); expect(getGateStatus(makeTask())).toBe(
}); "top-level task requires TaskClaimDone evidence before completion",
);
});
it("reports non-blocking reviewer failure", () => { it("reports non-blocking reviewer failure", () => {
expect(getGateStatus(makeTask({ expect(
metadata: { getGateStatus(
lgtm_evidence: "ok", makeTask({
robot_review_last_error: "Unexpected token 'a'", metadata: {
}, lgtm_evidence: "ok",
}))).toContain("review unavailable; autonomy continues"); robot_review_last_error: "Unexpected token 'a'",
}); },
}),
),
).toContain("review unavailable; autonomy continues");
});
it("reports rejected robot review when latest review does not accept", () => { it("reports rejected robot review when latest review does not accept", () => {
expect(getGateStatus(makeTask({ expect(
metadata: { getGateStatus(
lgtm_evidence: "ok", makeTask({
robot_reviews: [{ metadata: {
iteration: 1, lgtm_evidence: "ok",
reviewer: "opencode", robot_reviews: [
scope: "task evidence", {
observations: ["Observed missing output"], iteration: 1,
concerns: ["The current evidence is summary-only."], reviewer: "opencode",
suggestions: ["Paste the literal output."], scope: "task evidence",
blind_spots: "none", observations: ["Observed missing output"],
accepted: false, concerns: ["The current evidence is summary-only."],
evidence_complete: false, suggestions: ["Paste the literal output."],
evidence_convincing: false, blind_spots: "none",
missing_evidence: ["literal output"], accepted: false,
submitted_at: "2026-04-17T00:00:00.000Z", evidence_complete: false,
mode: "manual", evidence_convincing: false,
}], missing_evidence: ["literal output"],
}, submitted_at: "2026-04-17T00:00:00.000Z",
}))).toBe("latest proof review rejected the evidence; strengthen the proof and try again"); mode: "manual",
}); },
],
},
}),
),
).toBe(
"latest proof review rejected the evidence; strengthen the proof and try again",
);
});
it("keeps rejection higher priority than a later reviewer warning", () => { it("keeps rejection higher priority than a later reviewer warning", () => {
expect(getGateStatus(makeTask({ expect(
metadata: { getGateStatus(
lgtm_evidence: "ok", makeTask({
robot_review_last_error: "timeout", metadata: {
robot_reviews: [{ lgtm_evidence: "ok",
iteration: 1, robot_review_last_error: "timeout",
reviewer: "opencode", robot_reviews: [
scope: "task evidence", {
observations: ["Observed missing output"], iteration: 1,
concerns: ["The current evidence is summary-only."], reviewer: "opencode",
suggestions: ["Paste the literal output."], scope: "task evidence",
blind_spots: "none", observations: ["Observed missing output"],
accepted: false, concerns: ["The current evidence is summary-only."],
evidence_complete: false, suggestions: ["Paste the literal output."],
evidence_convincing: false, blind_spots: "none",
missing_evidence: ["literal output"], accepted: false,
submitted_at: "2026-04-17T00:00:00.000Z", evidence_complete: false,
mode: "manual", evidence_convincing: false,
}], missing_evidence: ["literal output"],
}, submitted_at: "2026-04-17T00:00:00.000Z",
}))).toBe("latest proof review rejected the evidence; strengthen the proof and try again"); mode: "manual",
}); },
],
},
}),
),
).toBe(
"latest proof review rejected the evidence; strengthen the proof and try again",
);
});
}); });
describe("getDisplayStatus", () => { describe("getDisplayStatus", () => {
it("returns pending for fresh tasks", () => { it("returns pending for fresh tasks", () => {
expect(getDisplayStatus(makeTask())).toBe("pending"); expect(getDisplayStatus(makeTask())).toBe("pending");
}); });
it("returns in_progress for active tasks not yet escalated", () => { it("returns in_progress for active tasks not yet escalated", () => {
expect(getDisplayStatus(makeTask({ status: "in_progress" }))).toBe("in_progress"); expect(getDisplayStatus(makeTask({ status: "in_progress" }))).toBe(
}); "in_progress",
);
});
it("returns completed for completed tasks", () => { it("returns completed for completed tasks", () => {
expect(getDisplayStatus(makeTask({ status: "completed" }))).toBe("completed"); expect(getDisplayStatus(makeTask({ status: "completed" }))).toBe(
}); "completed",
);
});
}); });
+102 -65
View File
@@ -1,78 +1,115 @@
import { describe, expect, it } from "vitest"; import { describe, expect, it } from "vitest";
import { import {
DEFAULT_ROBOT_REVIEW_TIMEOUT_MS, DEFAULT_ROBOT_REVIEW_TIMEOUT_MS,
extractFinalAssistantTextFromPiJsonl, extractFinalAssistantTextFromPiJsonl,
extractRobotReviewJson, extractRobotReviewJson,
getCurrentModelRef, getCurrentModelRef,
getPiInvocation, getPiInvocation,
getRobotReviewTimeoutMs, getRobotReviewTimeoutMs,
runRobotReviewCommand, runRobotReviewCommand,
} from "../src/index.js"; } from "../src/index.js";
describe("robot review runner helpers", () => { describe("robot review runner helpers", () => {
it("uses plain pi by default and allows override", () => { it("uses plain pi by default and allows override", () => {
expect(getPiInvocation(["--mode", "json"], {} as NodeJS.ProcessEnv)).toEqual({ expect(
command: "pi", getPiInvocation(["--mode", "json"], {} as NodeJS.ProcessEnv),
args: ["--mode", "json"], ).toEqual({
}); command: "pi",
expect(getPiInvocation(["-p"], { PI_PROOF_TASKS_PI_BIN: "/custom/pi" } as NodeJS.ProcessEnv)).toEqual({ args: ["--mode", "json"],
command: "/custom/pi", });
args: ["-p"], expect(
}); getPiInvocation(["-p"], {
}); PI_PROOF_TASKS_PI_BIN: "/custom/pi",
} as NodeJS.ProcessEnv),
).toEqual({
command: "/custom/pi",
args: ["-p"],
});
});
it("parses the final assistant text from pi jsonl", () => { it("parses the final assistant text from pi jsonl", () => {
const output = [ const output = [
"{\"type\":\"message_update\"}", '{"type":"message_update"}',
"{\"type\":\"message_end\",\"message\":{\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"ROBOT_REVIEW_JSON_START {\\\"accepted\\\":true} ROBOT_REVIEW_JSON_END\"}]}}", '{"type":"message_end","message":{"role":"assistant","content":[{"type":"text","text":"ROBOT_REVIEW_JSON_START {\\"accepted\\":true} ROBOT_REVIEW_JSON_END"}]}}',
].join("\n"); ].join("\n");
expect(extractFinalAssistantTextFromPiJsonl(output)).toContain("ROBOT_REVIEW_JSON_START"); expect(extractFinalAssistantTextFromPiJsonl(output)).toContain(
}); "ROBOT_REVIEW_JSON_START",
);
});
it("parses noisy JSON wrapped in review markers", () => { it("parses noisy JSON wrapped in review markers", () => {
const output = [ const output = [
"ROBOT_REVIEW_JSON_START", "ROBOT_REVIEW_JSON_START",
"and here is the JSON you asked for:", "and here is the JSON you asked for:",
"```json", "```json",
'{"accepted":true,"observations":["ok"]}', '{"accepted":true,"observations":["ok"]}',
"```", "```",
"ROBOT_REVIEW_JSON_END", "ROBOT_REVIEW_JSON_END",
].join("\n"); ].join("\n");
expect(extractRobotReviewJson(output)).toEqual({ accepted: true, observations: ["ok"] }); expect(extractRobotReviewJson(output)).toEqual({
}); accepted: true,
observations: ["ok"],
});
});
it("includes raw output context on parse failure", () => { it("includes raw output context on parse failure", () => {
expect(() => extractRobotReviewJson("ROBOT_REVIEW_JSON_START and nope ROBOT_REVIEW_JSON_END")).toThrow(/Raw output:/); expect(() =>
}); extractRobotReviewJson(
"ROBOT_REVIEW_JSON_START and nope ROBOT_REVIEW_JSON_END",
),
).toThrow(/Raw output:/);
});
it("uses configured timeout or falls back to default", () => { it("uses configured timeout or falls back to default", () => {
expect(getRobotReviewTimeoutMs({ PI_PROOF_TASKS_ROBOT_REVIEW_TIMEOUT_MS: "2500" } as NodeJS.ProcessEnv)).toBe(2500); expect(
expect(getRobotReviewTimeoutMs({ PI_PROOF_TASKS_ROBOT_REVIEW_TIMEOUT_MS: "bad" } as NodeJS.ProcessEnv)).toBe(DEFAULT_ROBOT_REVIEW_TIMEOUT_MS); getRobotReviewTimeoutMs({
}); PI_PROOF_TASKS_ROBOT_REVIEW_TIMEOUT_MS: "2500",
} as NodeJS.ProcessEnv),
).toBe(2500);
expect(
getRobotReviewTimeoutMs({
PI_PROOF_TASKS_ROBOT_REVIEW_TIMEOUT_MS: "bad",
} as NodeJS.ProcessEnv),
).toBe(DEFAULT_ROBOT_REVIEW_TIMEOUT_MS);
});
it("formats the current model as the reviewer model ref", () => { it("formats the current model as the reviewer model ref", () => {
expect(getCurrentModelRef({ provider: "openai", id: "gpt-5" })).toBe("openai/gpt-5"); expect(getCurrentModelRef({ provider: "openai", id: "gpt-5" })).toBe(
expect(getCurrentModelRef({ providerId: "anthropic", modelId: "claude-haiku" })).toBe("anthropic/claude-haiku"); "openai/gpt-5",
expect(getCurrentModelRef({ provider: "openai" })).toBeUndefined(); );
}); expect(
getCurrentModelRef({ providerId: "anthropic", modelId: "claude-haiku" }),
).toBe("anthropic/claude-haiku");
expect(getCurrentModelRef({ provider: "openai" })).toBeUndefined();
});
it("times out bounded child commands", async () => { it("times out bounded child commands", async () => {
await expect(runRobotReviewCommand({ await expect(
command: process.execPath, runRobotReviewCommand(
args: ["-e", "setTimeout(() => {}, 1000)"], {
}, undefined, 25)).rejects.toThrow(/timed out/i); command: process.execPath,
}); args: ["-e", "setTimeout(() => {}, 1000)"],
},
undefined,
25,
),
).rejects.toThrow(/timed out/i);
});
it("extracts assistant text from a child jsonl process", async () => { it("extracts assistant text from a child jsonl process", async () => {
const script = [ const script = [
"process.stdout.write(JSON.stringify({type:'message_update'}) + '\\n');", "process.stdout.write(JSON.stringify({type:'message_update'}) + '\\n');",
"process.stdout.write(JSON.stringify({type:'message_end',message:{role:'assistant',content:[{type:'text',text:'ROBOT_REVIEW_JSON_START {\\\"accepted\\\":true,\\\"observations\\\":[\\\"ok\\\"]} ROBOT_REVIEW_JSON_END'}]}}) + '\\n');", "process.stdout.write(JSON.stringify({type:'message_end',message:{role:'assistant',content:[{type:'text',text:'ROBOT_REVIEW_JSON_START {\\\"accepted\\\":true,\\\"observations\\\":[\\\"ok\\\"]} ROBOT_REVIEW_JSON_END'}]}}) + '\\n');",
].join(""); ].join("");
const result = await runRobotReviewCommand({ const result = await runRobotReviewCommand(
command: process.execPath, {
args: ["-e", script], command: process.execPath,
}, undefined, 500); args: ["-e", script],
expect(result.exitCode).toBe(0); },
expect(result.stdout).toContain("ROBOT_REVIEW_JSON_END"); undefined,
}); 500,
);
expect(result.exitCode).toBe(0);
expect(result.stdout).toContain("ROBOT_REVIEW_JSON_END");
});
}); });
+418 -260
View File
@@ -2,288 +2,446 @@ import { mkdtempSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os"; import { tmpdir } from "node:os";
import { join } from "node:path"; import { join } from "node:path";
import { describe, expect, it } from "vitest"; import { describe, expect, it } from "vitest";
import { archiveCurrentEvidence, buildArtifactRecords, buildRobotReviewPrompt, getCurrentEvidenceIteration, getEvidenceHistory, renderEvidencePacket, renderProofLog } from "../src/index.js"; import {
import { appendRobotReviewMetadata, getLatestRobotReview, getRobotReviews, hasCompleteProofClaim, relaxAdvisoryVerificationHints, shouldCompleteAfterAcceptedReview } from "../src/robot-review.js"; archiveCurrentEvidence,
buildArtifactRecords,
buildRobotReviewPrompt,
getCurrentEvidenceIteration,
getEvidenceHistory,
renderEvidencePacket,
renderProofLog,
} from "../src/index.js";
import {
appendRobotReviewMetadata,
getLatestRobotReview,
getRobotReviews,
hasCompleteProofClaim,
relaxAdvisoryVerificationHints,
shouldCompleteAfterAcceptedReview,
} from "../src/robot-review.js";
import type { Task } from "../src/types.js"; import type { Task } from "../src/types.js";
function makeTask(overrides: Partial<Task> = {}): Task { function makeTask(overrides: Partial<Task> = {}): Task {
return { return {
id: "1", id: "1",
subject: "Test", subject: "Test",
description: "Desc", description: "Desc",
done_criterion: "done", done_criterion: "done",
status: "pending", status: "pending",
progress_label: undefined, progress_label: undefined,
metadata: {}, metadata: {},
blocks: [], blocks: [],
blockedBy: [], blockedBy: [],
createdAt: 0, createdAt: 0,
updatedAt: 0, updatedAt: 0,
...overrides, ...overrides,
}; };
} }
describe("robot review helpers", () => { describe("robot review helpers", () => {
it("completes only after accepted review and complete proof claim", () => { it("completes only after accepted review and complete proof claim", () => {
const task = makeTask({ const task = makeTask({
metadata: { metadata: {
lgtm_evidence: "literal output", lgtm_evidence: "literal output",
lgtm_failure_likely: "wrong command", lgtm_failure_likely: "wrong command",
lgtm_failure_sneaky: "right output for wrong reason", lgtm_failure_sneaky: "right output for wrong reason",
lgtm_failure_unknown: "untested platform", lgtm_failure_unknown: "untested platform",
lgtm_falsification_test: "npm test\npass", lgtm_falsification_test: "npm test\npass",
lgtm_evidence_reasoning: "the test output rules out the named failures for this scope", lgtm_evidence_reasoning:
lgtm_verification_hints: ["test/robot-review.test.ts shows the expectation"], "the test output rules out the named failures for this scope",
lgtm_remaining_uncertainty: "does not test prod install", lgtm_verification_hints: [
}, "test/robot-review.test.ts shows the expectation",
}); ],
expect(hasCompleteProofClaim(task)).toBe(true); lgtm_remaining_uncertainty: "does not test prod install",
expect(shouldCompleteAfterAcceptedReview(task, true)).toBe(true); },
expect(shouldCompleteAfterAcceptedReview(task, false)).toBe(false); });
expect(shouldCompleteAfterAcceptedReview(makeTask({ metadata: { lgtm_evidence: "literal output" } }), true)).toBe(false); expect(hasCompleteProofClaim(task)).toBe(true);
}); expect(shouldCompleteAfterAcceptedReview(task, true)).toBe(true);
expect(shouldCompleteAfterAcceptedReview(task, false)).toBe(false);
expect(
shouldCompleteAfterAcceptedReview(
makeTask({ metadata: { lgtm_evidence: "literal output" } }),
true,
),
).toBe(false);
});
it("reads legacy single-review metadata", () => { it("reads legacy single-review metadata", () => {
const task = makeTask({ const task = makeTask({
metadata: { metadata: {
robot_review_reviewer: "opencode", robot_review_reviewer: "opencode",
robot_review_scope: "task evidence", robot_review_scope: "task evidence",
robot_review_observations: ["Observed no command output for the core claim"], robot_review_observations: [
robot_review_blind_spots: "Did not rerun tests", "Observed no command output for the core claim",
robot_review_submitted_at: "2026-04-17T00:00:00.000Z", ],
}, robot_review_blind_spots: "Did not rerun tests",
}); robot_review_submitted_at: "2026-04-17T00:00:00.000Z",
},
});
const reviews = getRobotReviews(task); const reviews = getRobotReviews(task);
expect(reviews).toHaveLength(1); expect(reviews).toHaveLength(1);
expect(reviews[0].reviewer).toBe("opencode"); expect(reviews[0].reviewer).toBe("opencode");
expect(reviews[0].iteration).toBe(1); expect(reviews[0].iteration).toBe(1);
expect(reviews[0].accepted).toBe(true); expect(reviews[0].accepted).toBe(true);
}); });
it("builds artifact records with absolute path and sha256", () => { it("builds artifact records with absolute path and sha256", () => {
const dir = mkdtempSync(join(tmpdir(), "pi-proof-tasks-")); const dir = mkdtempSync(join(tmpdir(), "pi-proof-tasks-"));
const path = join(dir, "evidence.log"); const path = join(dir, "evidence.log");
writeFileSync(path, "hello\n"); writeFileSync(path, "hello\n");
const [artifact] = buildArtifactRecords([path]); const [artifact] = buildArtifactRecords([path]);
expect(artifact.path).toBe(path); expect(artifact.path).toBe(path);
expect(artifact.bytes).toBe(6); expect(artifact.bytes).toBe(6);
expect(artifact.sha256).toHaveLength(64); expect(artifact.sha256).toHaveLength(64);
}); });
it("archives current evidence with reason", () => { it("archives current evidence with reason", () => {
const task = makeTask({ const task = makeTask({
metadata: { metadata: {
lgtm_evidence: "literal output", lgtm_evidence: "literal output",
lgtm_failure_likely: "wrong seed", lgtm_failure_likely: "wrong seed",
lgtm_failure_sneaky: "wrong threshold", lgtm_failure_sneaky: "wrong threshold",
lgtm_failure_unknown: "untested environment", lgtm_failure_unknown: "untested environment",
lgtm_falsification_test: "pytest -k check", lgtm_falsification_test: "pytest -k check",
lgtm_evidence_reasoning: "pytest output distinguishes the expected passing path from the named failures", lgtm_evidence_reasoning:
lgtm_verification_hints: ["see line 5"], "pytest output distinguishes the expected passing path from the named failures",
lgtm_remaining_uncertainty: "not load tested", lgtm_verification_hints: ["see line 5"],
lgtm_submitted_at: "2026-06-07T00:00:00.000Z", lgtm_remaining_uncertainty: "not load tested",
lgtm_commands: [{ cmd: "pytest", exit_code: 0 }], lgtm_submitted_at: "2026-06-07T00:00:00.000Z",
}, lgtm_commands: [{ cmd: "pytest", exit_code: 0 }],
}); },
});
const archived = archiveCurrentEvidence(task, "threshold changed"); const archived = archiveCurrentEvidence(task, "threshold changed");
const taskWithHistory = makeTask({ metadata: archived }); const taskWithHistory = makeTask({ metadata: archived });
expect(getCurrentEvidenceIteration(task)?.iteration).toBe(1); expect(getCurrentEvidenceIteration(task)?.iteration).toBe(1);
expect(getEvidenceHistory(taskWithHistory)).toHaveLength(1); expect(getEvidenceHistory(taskWithHistory)).toHaveLength(1);
expect(getEvidenceHistory(taskWithHistory)[0].supersede_reason).toBe("threshold changed"); expect(getEvidenceHistory(taskWithHistory)[0].supersede_reason).toBe(
}); "threshold changed",
);
});
it("treats verification hints as advisory when core evidence already passes", () => { it("treats advisory rubric failures as non-blocking when core evidence already passes", () => {
const review = relaxAdvisoryVerificationHints({ const review = relaxAdvisoryVerificationHints({
reviewer: "auto", reviewer: "auto",
scope: "task evidence", scope: "task evidence",
observations: ["Observed commit, push, and test logs"], observations: ["Observed commit, push, and test logs"],
concerns: [], concerns: [],
suggestions: [], suggestions: [],
blind_spots: "Did not inspect interactive UI", blind_spots: "Did not inspect interactive UI",
accepted: false, accepted: false,
evidence_complete: true, evidence_complete: true,
evidence_convincing: false, evidence_convincing: false,
missing_evidence: ["verification_hints_actionable"], missing_evidence: [
submitted_at: "2026-06-13T00:00:00.000Z", "verification_hints_actionable",
mode: "auto", "evidence_distinguishes_success",
rubric: { ],
evidence_covers_done_criterion: { reason: "verbatim logs match", pass: true }, submitted_at: "2026-06-13T00:00:00.000Z",
falsification_test_runnable: { reason: "command and output shown", pass: true }, mode: "auto",
failure_modes_addressed: { reason: "plausible top risks named", pass: true }, rubric: {
evidence_distinguishes_success: { reason: "evidence rules out named failures", pass: true }, evidence_covers_done_criterion: {
verification_hints_actionable: { reason: "paths are vague", pass: false }, reason: "verbatim logs match",
}, pass: true,
}); },
falsification_test_runnable: {
reason: "command and output shown",
pass: true,
},
failure_modes_addressed: {
reason: "plausible top risks named",
pass: true,
},
evidence_distinguishes_success: {
reason: "reasoning writeup is thin",
pass: false,
},
verification_hints_actionable: {
reason: "paths are vague",
pass: false,
},
},
});
expect(review.accepted).toBe(true); expect(review.accepted).toBe(true);
expect(review.evidence_convincing).toBe(true); expect(review.evidence_convincing).toBe(true);
expect(review.observations.at(-1)).toContain("treated as advisory"); expect(
expect(review.missing_evidence).toEqual([]); review.observations.some((item) => item.includes("treated as advisory")),
}); ).toBe(true);
expect(review.missing_evidence).toEqual([]);
});
it("does not relax verification hints unless the core rubric passes", () => { it("does not relax verification hints unless the core rubric passes", () => {
const review = relaxAdvisoryVerificationHints({ const review = relaxAdvisoryVerificationHints({
reviewer: "auto", reviewer: "auto",
scope: "task evidence", scope: "task evidence",
observations: ["Observed vague summary only"], observations: ["Observed vague summary only"],
concerns: [], concerns: [],
suggestions: [], suggestions: [],
blind_spots: "Did not rerun tests", blind_spots: "Did not rerun tests",
accepted: false, accepted: false,
evidence_complete: true, evidence_complete: true,
evidence_convincing: false, evidence_convincing: false,
missing_evidence: ["verification_hints_actionable"], missing_evidence: ["verification_hints_actionable"],
submitted_at: "2026-06-13T00:00:00.000Z", submitted_at: "2026-06-13T00:00:00.000Z",
mode: "auto", mode: "auto",
rubric: { rubric: {
evidence_covers_done_criterion: { reason: "summary only", pass: false }, evidence_covers_done_criterion: { reason: "summary only", pass: false },
falsification_test_runnable: { reason: "command and output shown", pass: true }, falsification_test_runnable: {
failure_modes_addressed: { reason: "plausible top risks named", pass: true }, reason: "command and output shown",
evidence_distinguishes_success: { reason: "evidence does not rule out summary-only failure", pass: false }, pass: true,
verification_hints_actionable: { reason: "paths are vague", pass: false }, },
}, failure_modes_addressed: {
}); reason: "plausible top risks named",
pass: true,
},
evidence_distinguishes_success: {
reason: "evidence does not rule out summary-only failure",
pass: false,
},
verification_hints_actionable: {
reason: "paths are vague",
pass: false,
},
},
});
expect(review.accepted).toBe(false); expect(review.accepted).toBe(false);
expect(review.evidence_convincing).toBe(false); expect(review.evidence_convincing).toBe(false);
}); });
it("renders one compact evidence packet for both human and robot review", () => { it("renders one compact evidence packet for both human and robot review", () => {
const task = makeTask({ const task = makeTask({
metadata: { metadata: {
lgtm_evidence: "literal output", lgtm_evidence: "literal output",
lgtm_failure_likely: "wrong seed", lgtm_failure_likely: "wrong seed",
lgtm_failure_sneaky: "wrong threshold", lgtm_failure_sneaky: "wrong threshold",
lgtm_failure_unknown: "does not test UI rendering", lgtm_failure_unknown: "does not test UI rendering",
lgtm_falsification_test: "pytest -k check\nPASSED", lgtm_falsification_test: "pytest -k check\nPASSED",
lgtm_evidence_reasoning: "The passing pytest transcript distinguishes success from wrong-threshold and wrong-seed failures for this test scope.", lgtm_evidence_reasoning:
lgtm_verification_hints: ["test/robot-review.test.ts contains the new guard test"], "The passing pytest transcript distinguishes success from wrong-threshold and wrong-seed failures for this test scope.",
lgtm_remaining_uncertainty: "not load tested", lgtm_verification_hints: [
lgtm_submitted_at: "2026-06-14T00:00:00.000Z", "test/robot-review.test.ts contains the new guard test",
lgtm_commands: [{ cmd: "npm test", exit_code: 0, stdout_path: "/tmp/test.log" }], ],
lgtm_evidence_artifacts: [{ path: "/tmp/test.log", sha256: "abc", bytes: 123 }], lgtm_remaining_uncertainty: "not load tested",
}, lgtm_submitted_at: "2026-06-14T00:00:00.000Z",
}); lgtm_commands: [
{ cmd: "npm test", exit_code: 0, stdout_path: "/tmp/test.log" },
],
lgtm_evidence_artifacts: [
{ path: "/tmp/test.log", sha256: "abc", bytes: 123 },
],
},
});
const packet = renderEvidencePacket(task); const packet = renderEvidencePacket(task);
const prompt = buildRobotReviewPrompt(task); const prompt = buildRobotReviewPrompt(task);
expect(packet).toContain("## Goal"); expect(packet).toContain("## Goal");
expect(packet).toContain("## Planned evidence / UAT"); expect(packet).toContain("## Attempt 1");
expect(packet).toContain("## Attempt 1"); expect(packet).toContain("### Evidence");
expect(prompt).toContain(packet); expect(packet).toContain("### Verify");
expect(prompt).toContain("does this evidence prove success for the stated goal"); expect(prompt).toContain(packet);
}); expect(prompt).toContain(
"does this packet prove the exact user-visible success condition",
);
expect(prompt).toContain(
"Do not reject solely because items 3, 4, or 5 are weak",
);
expect(prompt).toContain(
"concrete missing artifacts, command outputs, written-file checks",
);
});
it("appends robot reviews as iterations", () => { it("truncates long submitted evidence in the rendered proof log and points to the full artifact", () => {
const task = makeTask(); const longEvidence = Array.from(
const metadata1 = appendRobotReviewMetadata(task, { { length: 35 },
reviewer: "opencode", (_, i) => `line ${i + 1}`,
scope: "task evidence", ).join("\n");
observations: ["Observed missing benchmark output"], const task = makeTask({
concerns: ["The current evidence does not show the claimed speedup."], metadata: {
suggestions: ["Add the benchmark transcript for the claimed speedup."], lgtm_evidence: longEvidence,
blind_spots: "Did not inspect prod config", lgtm_failure_likely: "wrong seed",
accepted: false, lgtm_failure_sneaky: "wrong threshold",
evidence_complete: false, lgtm_failure_unknown: "untested environment",
evidence_convincing: false, lgtm_falsification_test: "pytest -k check\nPASSED",
missing_evidence: ["Benchmark output for the claimed speedup"], lgtm_evidence_reasoning:
submitted_at: "2026-04-17T00:00:00.000Z", "The transcript rules out the named failures for this scope.",
mode: "auto", lgtm_verification_hints: ["see /tmp/test.log"],
}); lgtm_remaining_uncertainty: "not load tested",
const task1 = makeTask({ metadata: metadata1 }); lgtm_submitted_at: "2026-06-14T00:00:00.000Z",
const metadata2 = appendRobotReviewMetadata(task1, { lgtm_evidence_artifacts: [
reviewer: "opencode", { path: "/tmp/test.log", sha256: "abc", bytes: 123 },
scope: "updated task evidence", ],
observations: ["Observed benchmark output and test transcript"], },
concerns: [], });
suggestions: [],
blind_spots: "Did not inspect long-run stability",
accepted: true,
evidence_complete: true,
evidence_convincing: true,
missing_evidence: [],
submitted_at: "2026-04-17T01:00:00.000Z",
mode: "auto",
});
const task2 = makeTask({ metadata: metadata2 }); const log = renderProofLog(task);
const reviews = getRobotReviews(task2); expect(log).toContain("line 1");
expect(reviews).toHaveLength(2); expect(log).toContain("line 8");
expect(reviews[0].iteration).toBe(1); expect(log).toContain("line 35");
expect(reviews[1].iteration).toBe(2); expect(log).not.toContain("line 9");
expect(getLatestRobotReview(task2)?.evidence_convincing).toBe(true); expect(log).toContain("[... 19 middle lines omitted ...]");
expect(task2.metadata.robot_review_iteration_count).toBe(2); expect(log).toContain(
}); "[truncated at 16 lines from 35; showing first 8 and last 8; full text: /tmp/test.log]",
);
});
it("renders a simple proof log with judgement and suggestions", () => { it("appends robot reviews as iterations", () => {
const taskWithEvidence = makeTask({ const task = makeTask();
metadata: { const metadata1 = appendRobotReviewMetadata(task, {
lgtm_evidence: "npm test\n125 passed", reviewer: "opencode",
lgtm_failure_likely: "old package name still in README", scope: "task evidence",
lgtm_failure_sneaky: "top-level direct completion still slips through", observations: ["Observed missing benchmark output"],
lgtm_failure_unknown: "fresh judge command fails in a real session", concerns: ["The current evidence does not show the claimed speedup."],
lgtm_falsification_test: "npm test\n125 passed", suggestions: ["Add the benchmark transcript for the claimed speedup."],
lgtm_evidence_reasoning: "The test transcript and grep distinguish the intended behavior from stale workflow regressions.", blind_spots: "Did not inspect prod config",
lgtm_verification_hints: ["README.md install block shows pi-proof-tasks"], accepted: false,
lgtm_remaining_uncertainty: "Did not exercise every model provider.", evidence_complete: false,
lgtm_submitted_at: "2026-06-14T00:00:00.000Z", evidence_convincing: false,
}, missing_evidence: ["Benchmark output for the claimed speedup"],
}); submitted_at: "2026-04-17T00:00:00.000Z",
const task = makeTask({ mode: "auto",
metadata: { });
...taskWithEvidence.metadata, const task1 = makeTask({ metadata: metadata1 });
...appendRobotReviewMetadata(taskWithEvidence, { const metadata2 = appendRobotReviewMetadata(task1, {
reviewer: "auto", reviewer: "opencode",
scope: "proof log", scope: "updated task evidence",
observations: ["Observed the test transcript and renamed package."], observations: ["Observed benchmark output and test transcript"],
concerns: ["The live Pi session path is still untested."], concerns: [],
suggestions: ["Run one self-hosted TaskClaimDone UAT."], suggestions: [],
blind_spots: "Did not inspect external auth state", blind_spots: "Did not inspect long-run stability",
accepted: false, accepted: true,
evidence_complete: true, evidence_complete: true,
evidence_convincing: false, evidence_convincing: true,
missing_evidence: ["self-hosted TaskClaimDone UAT"], missing_evidence: [],
submitted_at: "2026-06-14T00:01:00.000Z", submitted_at: "2026-04-17T01:00:00.000Z",
mode: "auto", mode: "auto",
}), });
},
});
const log = renderProofLog(task); const task2 = makeTask({ metadata: metadata2 });
expect(log).toContain("# Task #1: Test"); const reviews = getRobotReviews(task2);
expect(log).toContain("## Goal"); expect(reviews).toHaveLength(2);
expect(log).toContain("## Planned evidence / UAT"); expect(reviews[0].iteration).toBe(1);
expect(log).toContain("## Attempt 1"); expect(reviews[1].iteration).toBe(2);
expect(log).toContain("### Submitted evidence"); expect(getLatestRobotReview(task2)?.evidence_convincing).toBe(true);
expect(log).toContain("### Judgement"); expect(task2.metadata.robot_review_iteration_count).toBe(2);
expect(log).toContain("Refused by auto"); });
expect(log).toContain("Run one self-hosted TaskClaimDone UAT.");
});
it("renders reviewer-unavailable proof logs for fail-open completion notes", () => { it("renders a simple proof log with judgement and suggestions", () => {
const task = makeTask({ const taskWithEvidence = makeTask({
status: "completed", metadata: {
metadata: { lgtm_evidence: "npm test\n125 passed",
lgtm_evidence: "npm test\n125 passed", lgtm_failure_likely: "old package name still in README",
lgtm_failure_likely: "old package name still in README", lgtm_failure_sneaky: "top-level direct completion still slips through",
lgtm_failure_sneaky: "top-level direct completion still slips through", lgtm_failure_unknown: "fresh judge command fails in a real session",
lgtm_failure_unknown: "fresh judge command fails in a real session", lgtm_falsification_test: "npm test\n125 passed",
lgtm_falsification_test: "npm test\n125 passed", lgtm_evidence_reasoning:
lgtm_evidence_reasoning: "The test transcript and grep distinguish the intended behavior from stale workflow regressions.", "The test transcript and grep distinguish the intended behavior from stale workflow regressions.",
lgtm_verification_hints: ["README.md install block shows pi-proof-tasks"], lgtm_verification_hints: [
lgtm_remaining_uncertainty: "Did not exercise every model provider.", "README.md install block shows pi-proof-tasks",
robot_review_last_error: "judge auth failed", ],
}, lgtm_remaining_uncertainty: "Did not exercise every model provider.",
}); lgtm_submitted_at: "2026-06-14T00:00:00.000Z",
},
});
const task = makeTask({
metadata: {
...taskWithEvidence.metadata,
...appendRobotReviewMetadata(taskWithEvidence, {
reviewer: "auto",
scope: "proof log",
observations: ["Observed the test transcript and renamed package."],
concerns: ["The live Pi session path is still untested."],
suggestions: ["Run one self-hosted TaskClaimDone UAT."],
blind_spots: "Did not inspect external auth state",
accepted: false,
evidence_complete: true,
evidence_convincing: false,
missing_evidence: ["self-hosted TaskClaimDone UAT"],
submitted_at: "2026-06-14T00:01:00.000Z",
mode: "auto",
}),
},
});
const log = renderProofLog(task); const log = renderProofLog(task);
expect(log).toContain("completed with reviewer unavailable"); expect(log).toContain("# Task #1: Test");
expect(log).toContain("### Judgement"); expect(log).toContain("## Goal");
expect(log).toContain("judge auth failed"); expect(log).toContain("## Attempt 1");
expect(log).toContain("Autonomy continued without blocking completion."); expect(log).toContain("### Evidence");
}); expect(log).toContain("### Verify");
expect(log).toContain("### Judgement");
expect(log).toContain("Refused by auto");
expect(log).toContain("### Observations");
expect(log).toContain("### Concerns");
expect(log).toContain("### Missing evidence");
expect(log).toContain("### Suggestions");
expect(log).toContain("Run one self-hosted TaskClaimDone UAT.");
});
it("keeps full submitted evidence in the automatic review packet even when proof logs truncate it", () => {
const artifactPath = join(tmpdir(), "proof-packet-long-evidence.log");
const longEvidence = Array.from(
{ length: 35 },
(_, i) => `line ${i + 1}`,
).join("\n");
writeFileSync(artifactPath, longEvidence);
const task = makeTask({
metadata: {
lgtm_evidence: longEvidence,
lgtm_failure_likely: "missing artifact",
lgtm_failure_sneaky: "wrong slice shown",
lgtm_failure_unknown: "untested provider path",
lgtm_falsification_test: "npm test\npass",
lgtm_evidence_reasoning:
"The full evidence must stay visible to the judge even if humans see a shortened preview.",
lgtm_verification_hints: [
"Open the artifact if the inline preview truncates.",
],
lgtm_remaining_uncertainty: "Did not inspect live TUI.",
lgtm_evidence_artifacts: buildArtifactRecords([artifactPath]),
},
});
const proofLog = renderProofLog(task);
const reviewPacket = renderEvidencePacket(task, {
truncateEvidence: false,
});
expect(proofLog).toContain("line 8");
expect(proofLog).toContain("line 35");
expect(proofLog).not.toContain("line 9");
expect(reviewPacket).toContain("line 35");
expect(reviewPacket).not.toContain("[truncated at 16 lines");
});
it("renders reviewer-unavailable proof logs for fail-open completion notes", () => {
const task = makeTask({
status: "completed",
metadata: {
lgtm_evidence: "npm test\n125 passed",
lgtm_failure_likely: "old package name still in README",
lgtm_failure_sneaky: "top-level direct completion still slips through",
lgtm_failure_unknown: "fresh judge command fails in a real session",
lgtm_falsification_test: "npm test\n125 passed",
lgtm_evidence_reasoning:
"The test transcript and grep distinguish the intended behavior from stale workflow regressions.",
lgtm_verification_hints: [
"README.md install block shows pi-proof-tasks",
],
lgtm_remaining_uncertainty: "Did not exercise every model provider.",
robot_review_last_error: "judge auth failed",
},
});
const log = renderProofLog(task);
expect(log).toContain("completed with reviewer unavailable");
expect(log).toContain("### Judgement");
expect(log).toContain("judge auth failed");
expect(log).toContain("### Suggestions");
expect(log).not.toContain("### Missing evidence");
expect(log).not.toContain("### Observations");
expect(log).not.toContain("### Concerns");
expect(log).toContain("Autonomy continued without blocking completion.");
});
}); });
+190
View File
@@ -0,0 +1,190 @@
import { chmodSync, mkdtempSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, describe, expect, it, vi } from "vitest";
import proofTasksExtension from "../src/index.js";
type RegisteredTool = {
name: string;
execute: (...args: any[]) => Promise<any>;
};
function makeHarness() {
const tools = new Map<string, RegisteredTool>();
const pi = {
on: vi.fn(),
registerTool: vi.fn((tool: RegisteredTool) => tools.set(tool.name, tool)),
registerCommand: vi.fn(),
sendMessage: vi.fn(),
};
proofTasksExtension(pi as any);
async function execTool(
name: string,
params: Record<string, unknown>,
ctx: Record<string, unknown> = {},
) {
const tool = tools.get(name);
if (!tool) throw new Error(`Tool ${name} not registered`);
return tool.execute("tool-call", params, undefined, undefined, ctx);
}
return { execTool };
}
function writeReviewerScript(source: string): string {
const dir = mkdtempSync(join(tmpdir(), "pi-proof-reviewer-"));
const path = join(dir, "reviewer.js");
writeFileSync(path, `#!/usr/bin/env node\n${source}\n`);
chmodSync(path, 0o755);
return path;
}
const ORIGINAL_PI_BIN = process.env.PI_PROOF_TASKS_PI_BIN;
afterEach(() => {
if (ORIGINAL_PI_BIN === undefined) delete process.env.PI_PROOF_TASKS_PI_BIN;
else process.env.PI_PROOF_TASKS_PI_BIN = ORIGINAL_PI_BIN;
});
describe("TaskClaimDone end-to-end proof flow", () => {
it("keeps the task open on rejected review and /lgtm-style TaskGet shows truncated evidence", async () => {
const reviewer = writeReviewerScript(`
const review = {
reviewer: "fake-judge",
scope: "task evidence",
rubric: {
evidence_covers_done_criterion: { reason: "missing one artifact", pass: false },
falsification_test_runnable: { reason: "ok", pass: true },
failure_modes_addressed: { reason: "ok", pass: true },
evidence_distinguishes_success: { reason: "not enough", pass: false },
verification_hints_actionable: { reason: "ok", pass: true }
},
observations: ["Observed truncated proof packet"],
concerns: ["Need stronger evidence"],
suggestions: ["Add one more artifact"],
blind_spots: "Did not inspect live TUI",
missing_evidence: ["evidence_covers_done_criterion", "evidence_distinguishes_success"],
evidence_complete: false,
evidence_convincing: false,
accepted: false
};
console.log("ROBOT_REVIEW_JSON_START");
console.log(JSON.stringify(review));
console.log("ROBOT_REVIEW_JSON_END");
`);
process.env.PI_PROOF_TASKS_PI_BIN = reviewer;
const harness = makeHarness();
await harness.execTool("TaskCreate", {
subject: "Proof task",
description: "Desc",
done_criterion: "done",
});
const artifactPath = join(tmpdir(), "proof-long-evidence.log");
const longEvidence = Array.from(
{ length: 35 },
(_, i) => `line ${i + 1}`,
).join("\n");
writeFileSync(artifactPath, longEvidence);
const claim = await harness.execTool(
"TaskClaimDone",
{
taskId: "1",
evidence: longEvidence,
failure_likely: "missing artifact",
failure_sneaky: "right shape for wrong reason",
failure_unknown: "untested provider path",
falsification_test: "npm test\npass",
evidence_reasoning:
"The packet distinguishes the named failures for this test scope.",
verification_hints: ["look at the proof log"],
remaining_uncertainty: "Did not inspect live TUI",
evidence_paths: [artifactPath],
},
{ model: { provider: "openai", id: "gpt-5" } },
);
const claimText = claim.content[0].text;
const taskGet = await harness.execTool("TaskGet", { taskId: "1" });
const text = taskGet.content[0].text;
expect(claimText).toContain("## TaskClaimDone -> Task #1: Proof task");
expect(claimText).toContain("### Metadata");
expect(claimText).toContain("- Proof iterations: 1");
expect(claimText).toContain("- Robot reviews: 1");
expect(text).toContain("Status: pending");
expect(text).toContain(
"Gate status: latest proof review rejected the evidence; strengthen the proof and try again",
);
expect(text).toContain("line 1");
expect(text).toContain("line 8");
expect(text).toContain("line 35");
expect(text).not.toContain("line 9");
expect(text).toContain("[... 19 middle lines omitted ...]");
expect(text).toContain(
`[truncated at 16 lines from 35; showing first 8 and last 8; full text: ${artifactPath}]`,
);
expect(text).toContain("### Judgement");
expect(text).toContain("Refused");
expect(text).toContain("### Missing evidence");
expect(text).toContain("### Suggestions");
expect(text).toContain("Add one more artifact");
});
it("completes the task fail-open on parse failure and preserves the failure note", async () => {
const reviewer = writeReviewerScript(`
console.log("ROBOT_REVIEW_JSON_START and nope ROBOT_REVIEW_JSON_END");
`);
process.env.PI_PROOF_TASKS_PI_BIN = reviewer;
const harness = makeHarness();
await harness.execTool("TaskCreate", {
subject: "Proof task",
description: "Desc",
done_criterion: "done",
});
const claim = await harness.execTool(
"TaskClaimDone",
{
taskId: "1",
evidence: "short evidence",
failure_likely: "missing artifact",
failure_sneaky: "right shape for wrong reason",
failure_unknown: "untested provider path",
falsification_test: "npm test\npass",
evidence_reasoning:
"The packet distinguishes the named failures for this test scope.",
verification_hints: ["look at the proof log"],
remaining_uncertainty: "Did not inspect live TUI",
},
{ model: { provider: "openai", id: "gpt-5" } },
);
const claimText = claim.content[0].text;
const taskGet = await harness.execTool("TaskGet", { taskId: "1" });
const text = taskGet.content[0].text;
expect(claimText).toContain("## TaskClaimDone -> Task #1: Proof task");
expect(claimText).toContain("### Metadata");
expect(claimText).toContain(
"- Gate status: completed with reviewer unavailable",
);
expect(text).toContain("Status: completed");
expect(text).toContain("completed with reviewer unavailable");
expect(text).toContain("Raw output:");
expect(text).toContain("### Suggestions");
expect(text).not.toContain("### Missing evidence\n- (none)");
expect(text).not.toContain("### Observations\n- (none)");
expect(text).not.toContain("### Concerns\n- (none)");
expect(text).toContain(
"ROBOT_REVIEW_JSON_START and nope ROBOT_REVIEW_JSON_END",
);
expect(text).toContain("Autonomy continued without blocking completion.");
});
});
+299 -86
View File
@@ -1,99 +1,312 @@
import { describe, expect, it, vi } from "vitest"; import { mkdtempSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, describe, expect, it, vi } from "vitest";
import proofTasksExtension from "../src/index.js"; import proofTasksExtension from "../src/index.js";
import { TaskStore } from "../src/task-store.js";
type RegisteredTool = { type RegisteredTool = {
name: string; name: string;
execute: (...args: any[]) => Promise<any>; execute: (...args: any[]) => Promise<any>;
}; };
function makeHarness() { function makeHarness() {
const tools = new Map<string, RegisteredTool>(); const tools = new Map<string, RegisteredTool>();
const pi = { const handlers = new Map<string, Array<(...args: any[]) => any>>();
on: vi.fn(), const pi = {
registerTool: vi.fn((tool: RegisteredTool) => tools.set(tool.name, tool)), on: vi.fn((event: string, handler: (...args: any[]) => any) => {
registerCommand: vi.fn(), const existing = handlers.get(event) ?? [];
sendMessage: vi.fn(), existing.push(handler);
}; handlers.set(event, existing);
}),
registerTool: vi.fn((tool: RegisteredTool) => tools.set(tool.name, tool)),
registerCommand: vi.fn(),
sendMessage: vi.fn(),
};
proofTasksExtension(pi as any); proofTasksExtension(pi as any);
async function execTool(name: string, params: Record<string, unknown>) { async function execTool(name: string, params: Record<string, unknown>) {
const tool = tools.get(name); const tool = tools.get(name);
if (!tool) throw new Error(`Tool ${name} not registered`); if (!tool) throw new Error(`Tool ${name} not registered`);
return tool.execute("tool-call", params, undefined, undefined, {}); return tool.execute("tool-call", params, undefined, undefined, {});
} }
return { execTool }; async function trigger(event: string, payload: any = {}, ctx: any = {}) {
for (const handler of handlers.get(event) ?? []) {
await handler(payload, ctx);
}
}
return { execTool, trigger };
} }
describe("TaskList", () => { const tempDirs: string[] = [];
it("renders a compact one-line-per-task summary", async () => {
const harness = makeHarness();
await harness.execTool("TaskCreate", {
subject: "Design the flux capacitor",
description: "Desc",
done_criterion: "done",
});
await harness.execTool("TaskCreate", {
subject: "Acquiring plutonium",
description: "Desc",
done_criterion: "done",
progress_label: "Acquiring plutonium",
});
await harness.execTool("TaskCreate", {
subject: "Install flux capacitor in DeLorean",
description: "Desc",
done_criterion: "done",
parentId: "1",
});
await harness.execTool("TaskCreate", {
subject: "Test time travel at 88 mph",
description: "Desc",
done_criterion: "done",
});
await harness.execTool("TaskUpdate", { taskId: "1", status: "completed" }); afterEach(() => {
await harness.execTool("TaskUpdate", { taskId: "2", status: "in_progress" }); delete process.env.PI_TASKS;
await harness.execTool("TaskUpdate", { taskId: "3", add_blocked_by: ["1"] }); while (tempDirs.length > 0)
await harness.execTool("TaskUpdate", { taskId: "4", add_blocked_by: ["2", "3"] }); rmSync(tempDirs.pop()!, { recursive: true, force: true });
});
const result = await harness.execTool("TaskList", {});
const text = result.content[0].text; describe("Task tools", () => {
it("renders a compact one-line-per-task summary", async () => {
expect(text).toContain("● 4 tasks (1 in progress, 3 open)"); const harness = makeHarness();
expect(text).toContain("◻ #1 Design the flux capacitor"); await harness.execTool("TaskCreate", {
expect(text).toContain("◼ #2 Acquiring plutonium"); subject: "Design the flux capacitor",
expect(text).toContain("◻ #3 Install flux capacitor in DeLorean subtask of #1 blocked by #1"); description: "Desc",
expect(text).toContain("◻ #4 Test time travel at 88 mph blocked by #2, #3"); done_criterion: "done",
expect(text).not.toContain("[ACTIVE]"); });
expect(text).not.toContain("[PENDING]"); await harness.execTool("TaskCreate", {
expect(text).not.toContain("[DONE"); subject: "Acquiring plutonium",
expect(text).not.toContain("🛠"); description: "Desc",
expect(text).not.toContain("test:"); done_criterion: "done",
}); progress_label: "Acquiring plutonium",
});
it("shows completed subtasks without proof-lane clutter", async () => { await harness.execTool("TaskCreate", {
const harness = makeHarness(); subject: "Install flux capacitor in DeLorean",
await harness.execTool("TaskCreate", { description: "Desc",
subject: "Top-level goal", done_criterion: "done",
description: "Desc", parentId: "1",
done_criterion: "done", });
}); await harness.execTool("TaskCreate", {
await harness.execTool("TaskCreate", { subject: "Test time travel at 88 mph",
subject: "Finished checklist item", description: "Desc",
description: "Desc", done_criterion: "done",
done_criterion: "done", });
parentId: "1",
}); await harness.execTool("TaskUpdate", { taskId: "1", status: "completed" });
await harness.execTool("TaskUpdate", {
await harness.execTool("TaskUpdate", { taskId: "2", status: "completed" }); taskId: "2",
status: "in_progress",
const result = await harness.execTool("TaskList", {}); });
const text = result.content[0].text; await harness.execTool("TaskUpdate", {
taskId: "3",
expect(text).toContain("● 2 tasks (1 done, 1 open)"); add_blocked_by: ["1"],
expect(text).toContain("✔ #2 Finished checklist item subtask of #1"); });
expect(text).not.toContain("[DONE"); await harness.execTool("TaskUpdate", {
expect(text).not.toContain("🛠"); taskId: "4",
}); add_blocked_by: ["2", "3"],
});
const result = await harness.execTool("TaskList", {});
const text = result.content[0].text;
expect(text).toContain("● 4 goals (1 in progress, 3 open)");
expect(text).toContain("◻ #1 Design the flux capacitor");
expect(text).toContain("◼ #2 Acquiring plutonium");
expect(text).toContain(
"◻ #3 Install flux capacitor in DeLorean subtask of #1 blocked by #1",
);
expect(text).toContain(
"◻ #4 Test time travel at 88 mph blocked by #2, #3",
);
expect(text).not.toContain("[ACTIVE]");
expect(text).not.toContain("[PENDING]");
expect(text).not.toContain("[DONE");
expect(text).not.toContain("proof claim submitted");
expect(text).not.toContain("test:");
});
it("shows TaskCreate output with metadata and compact previews", async () => {
const harness = makeHarness();
const result = await harness.execTool("TaskCreate", {
subject: "Top-level goal",
description: "Line 1\nLine 2\nLine 3",
done_criterion: "observe line a\nobserve line b",
progress_label: "Running check",
metadata: { owner: "pi", note: "short" },
});
const text = result.content[0].text;
expect(text).toContain("## TaskCreate -> Task #1: Top-level goal");
expect(text).toContain("### Metadata");
expect(text).toContain("- Metadata keys: 2");
expect(text).toContain("### Done criterion");
expect(text).toContain("### Description");
expect(text).toContain("### Progress label");
expect(text).toContain("### Metadata preview");
});
it("shows TaskUpdate output with changed fields and previews", async () => {
const harness = makeHarness();
await harness.execTool("TaskCreate", {
subject: "Top-level goal",
description: "Desc",
done_criterion: "done",
});
const result = await harness.execTool("TaskUpdate", {
taskId: "1",
status: "in_progress",
progress_label: "Running check",
metadata: { owner: "pi" },
});
const text = result.content[0].text;
expect(text).toContain("## TaskUpdate -> Task #1: Top-level goal");
expect(text).toContain(
"- Updated fields: status, progress_label, metadata",
);
expect(text).toContain("- status: pending -> in_progress");
expect(text).toContain("- progress_label: (missing) -> Running check");
expect(text).toContain("### Metadata patch");
});
it("shows completed subtasks without proof-lane clutter", async () => {
const harness = makeHarness();
await harness.execTool("TaskCreate", {
subject: "Top-level goal",
description: "Desc",
done_criterion: "done",
});
await harness.execTool("TaskCreate", {
subject: "Finished checklist item",
description: "Desc",
done_criterion: "done",
parentId: "1",
});
await harness.execTool("TaskUpdate", { taskId: "2", status: "completed" });
const result = await harness.execTool("TaskList", {});
const text = result.content[0].text;
expect(text).toContain("● 2 goals (1 done hidden, 1 open)");
expect(text).toContain("◻ #1 Top-level goal");
expect(text).not.toContain("#2 Finished checklist item");
expect(text).not.toContain("[DONE");
expect(text).not.toContain("proof claim submitted");
});
it("keeps persisted completed tasks on startup but hides them from the collapsed list", async () => {
const dir = mkdtempSync(join(tmpdir(), "pi-proof-tasks-"));
tempDirs.push(dir);
const taskPath = join(dir, "tasks.json");
process.env.PI_TASKS = taskPath;
const seeded = new TaskStore(taskPath);
seeded.create("Finished work", "Desc", "done");
seeded.complete("1");
const harness = makeHarness();
await harness.trigger(
"before_agent_start",
{},
{
ui: { setWidget() {}, setStatus() {} },
sessionManager: { getSessionId: () => "session-test" },
},
);
const result = await harness.execTool("TaskList", {});
expect(result.content[0].text).toContain("● 1 goals (1 done hidden)");
expect(result.content[0].text).toContain(
"No open tasks. Completed tasks are hidden by default.",
);
const reloaded = new TaskStore(taskPath);
expect(reloaded.get("1")?.status).toBe("completed");
});
it("keeps persisted completed tasks on startup even when one open goal remains", async () => {
const dir = mkdtempSync(join(tmpdir(), "pi-proof-tasks-"));
tempDirs.push(dir);
const taskPath = join(dir, "tasks.json");
process.env.PI_TASKS = taskPath;
const seeded = new TaskStore(taskPath);
seeded.create("Open goal", "Desc", "done");
seeded.create("Finished work", "Desc", "done", undefined, undefined, "1");
seeded.complete("2");
const harness = makeHarness();
await harness.trigger(
"before_agent_start",
{},
{
ui: { setWidget() {}, setStatus() {} },
sessionManager: { getSessionId: () => "session-test" },
},
);
const result = await harness.execTool("TaskList", {});
const text = result.content[0].text;
expect(text).toContain("● 2 goals (1 done hidden, 1 open)");
expect(text).toContain("◻ #1 Open goal");
expect(text).not.toContain("Finished work");
const reloaded = new TaskStore(taskPath);
expect(reloaded.get("2")?.status).toBe("completed");
});
it("keeps completed tasks persisted by default across later turns", async () => {
const dir = mkdtempSync(join(tmpdir(), "pi-proof-tasks-"));
tempDirs.push(dir);
const taskPath = join(dir, "tasks.json");
process.env.PI_TASKS = taskPath;
const harness = makeHarness();
await harness.execTool("TaskCreate", {
subject: "Persistent completed goal",
description: "Desc",
done_criterion: "done",
});
await harness.execTool("TaskCreate", {
subject: "Checklist item",
description: "Desc",
done_criterion: "done",
parentId: "1",
});
await harness.execTool("TaskUpdate", { taskId: "2", status: "completed" });
for (let turn = 0; turn < 8; turn++) {
await harness.trigger("turn_start", {}, {
ui: { setWidget() {}, setStatus() {} },
sessionManager: { getSessionId: () => "session-test" },
});
}
const reloaded = new TaskStore(taskPath);
expect(reloaded.get("2")?.status).toBe("completed");
});
it("stores named PI_TASKS lists inside the repo .pi/tasks directory", async () => {
process.env.PI_TASKS = `named-${Date.now()}`;
const expectedPath = join(
process.cwd(),
".pi",
"tasks",
`${process.env.PI_TASKS}.json`,
);
try {
rmSync(expectedPath);
} catch {}
try {
rmSync(expectedPath + ".lock");
} catch {}
try {
rmSync(expectedPath + ".tmp");
} catch {}
const harness = makeHarness();
await harness.execTool("TaskCreate", {
subject: "Repo local task",
description: "Desc",
done_criterion: "done",
});
const reloaded = new TaskStore(expectedPath);
expect(reloaded.get("1")?.subject).toBe("Repo local task");
try {
rmSync(expectedPath);
} catch {}
try {
rmSync(expectedPath + ".lock");
} catch {}
try {
rmSync(expectedPath + ".tmp");
} catch {}
});
}); });
+496 -433
View File
@@ -1,467 +1,530 @@
import { readFileSync, rmSync } from "node:fs"; import { readFileSync, rmSync } from "node:fs";
import { homedir, tmpdir } from "node:os"; import { tmpdir } from "node:os";
import { join } from "node:path"; import { join } from "node:path";
import { afterEach, beforeEach, describe, expect, it } from "vitest"; import { afterEach, beforeEach, describe, expect, it } from "vitest";
import { TaskStore } from "../src/task-store.js"; import { TaskStore } from "../src/task-store.js";
// Helper: create a subtask, which can be ticked off directly. // Helper: create a subtask, which can be ticked off directly.
function createSubtask(store: TaskStore, subject: string) { function createSubtask(store: TaskStore, subject: string) {
const parent = store.create(`${subject} parent`, "Desc", "done criterion"); const parent = store.create(`${subject} parent`, "Desc", "done criterion");
return store.create(subject, "Desc", "done criterion", undefined, undefined, parent.id); return store.create(
subject,
"Desc",
"done criterion",
undefined,
undefined,
parent.id,
);
} }
describe("TaskStore (in-memory)", () => { describe("TaskStore (in-memory)", () => {
let store: TaskStore; let store: TaskStore;
beforeEach(() => { beforeEach(() => {
store = new TaskStore(); // no listId = in-memory store = new TaskStore(); // no listId = in-memory
}); });
it("creates tasks with auto-incrementing IDs", () => { it("creates tasks with auto-incrementing IDs", () => {
const t1 = store.create("First task", "Description 1", "criterion 1"); const t1 = store.create("First task", "Description 1", "criterion 1");
const t2 = store.create("Second task", "Description 2", "criterion 2"); const t2 = store.create("Second task", "Description 2", "criterion 2");
expect(t1.id).toBe("1"); expect(t1.id).toBe("1");
expect(t2.id).toBe("2"); expect(t2.id).toBe("2");
expect(t1.status).toBe("pending"); expect(t1.status).toBe("pending");
expect(t1.subject).toBe("First task"); expect(t1.subject).toBe("First task");
expect(t1.description).toBe("Description 1"); expect(t1.description).toBe("Description 1");
expect(t1.done_criterion).toBe("criterion 1"); expect(t1.done_criterion).toBe("criterion 1");
}); });
it("creates tasks with optional fields", () => { it("creates tasks with optional fields", () => {
const t = store.create("Task", "Desc", "done criterion", "Running task", { key: "value" }); const t = store.create("Task", "Desc", "done criterion", "Running task", {
key: "value",
expect(t.progress_label).toBe("Running task"); });
expect(t.metadata).toEqual({ key: "value" });
}); expect(t.progress_label).toBe("Running task");
expect(t.metadata).toEqual({ key: "value" });
it("gets a task by ID", () => { });
store.create("Test", "Desc", "done");
const task = store.get("1"); it("gets a task by ID", () => {
store.create("Test", "Desc", "done");
expect(task).toBeDefined(); const task = store.get("1");
expect(task!.subject).toBe("Test");
}); expect(task).toBeDefined();
expect(task!.subject).toBe("Test");
it("returns undefined for non-existent task", () => { });
expect(store.get("999")).toBeUndefined();
}); it("returns undefined for non-existent task", () => {
expect(store.get("999")).toBeUndefined();
it("lists all tasks sorted by ID", () => { });
store.create("Task 3", "Desc", "done");
store.create("Task 1", "Desc", "done"); it("lists all tasks sorted by ID", () => {
store.create("Task 2", "Desc", "done"); store.create("Task 3", "Desc", "done");
store.create("Task 1", "Desc", "done");
const tasks = store.list(); store.create("Task 2", "Desc", "done");
expect(tasks.map(t => t.id)).toEqual(["1", "2", "3"]);
}); const tasks = store.list();
expect(tasks.map((t) => t.id)).toEqual(["1", "2", "3"]);
it("updates task status", () => { });
store.create("Test", "Desc", "done");
const { task, changedFields } = store.update("1", { status: "in_progress" }); it("updates task status", () => {
store.create("Test", "Desc", "done");
expect(task!.status).toBe("in_progress"); const { task, changedFields } = store.update("1", {
expect(changedFields).toEqual(["status"]); status: "in_progress",
}); });
it("updates multiple fields at once", () => { expect(task!.status).toBe("in_progress");
store.create("Test", "Desc", "done"); expect(changedFields).toEqual(["status"]);
const { changedFields } = store.update("1", { });
subject: "Updated subject",
description: "Updated desc", it("updates multiple fields at once", () => {
metadata: { owner: "agent-1" }, store.create("Test", "Desc", "done");
}); const { changedFields } = store.update("1", {
subject: "Updated subject",
expect(changedFields).toContain("subject"); description: "Updated desc",
expect(changedFields).toContain("description"); metadata: { owner: "agent-1" },
expect(changedFields).toContain("metadata"); });
const task = store.get("1")!; expect(changedFields).toContain("subject");
expect(task.subject).toBe("Updated subject"); expect(changedFields).toContain("description");
expect(task.metadata.owner).toBe("agent-1"); expect(changedFields).toContain("metadata");
});
const task = store.get("1")!;
it("deletes a task with status: deleted", () => { expect(task.subject).toBe("Updated subject");
store.create("Test", "Desc", "done"); expect(task.metadata.owner).toBe("agent-1");
const { changedFields } = store.update("1", { status: "deleted" }); });
expect(changedFields).toEqual(["deleted"]); it("deletes a task with status: deleted", () => {
expect(store.get("1")).toBeUndefined(); store.create("Test", "Desc", "done");
expect(store.list()).toHaveLength(0); const { changedFields } = store.update("1", { status: "deleted" });
});
expect(changedFields).toEqual(["deleted"]);
it("preserves ID counter after deletion", () => { expect(store.get("1")).toBeUndefined();
store.create("Task 1", "Desc", "done"); expect(store.list()).toHaveLength(0);
store.create("Task 2", "Desc", "done"); });
store.update("1", { status: "deleted" });
it("preserves ID counter after deletion", () => {
const t3 = store.create("Task 3", "Desc", "done"); store.create("Task 1", "Desc", "done");
expect(t3.id).toBe("3"); // Not "1" — counter continues store.create("Task 2", "Desc", "done");
}); store.update("1", { status: "deleted" });
it("merges metadata with null key deletion", () => { const t3 = store.create("Task 3", "Desc", "done");
store.create("Test", "Desc", "done", undefined, { a: 1, b: 2, c: 3 }); expect(t3.id).toBe("3"); // Not "1" — counter continues
store.update("1", { metadata: { b: null, d: 4 } }); });
const task = store.get("1")!; it("merges metadata with null key deletion", () => {
expect(task.metadata).toEqual({ a: 1, c: 3, d: 4 }); store.create("Test", "Desc", "done", undefined, { a: 1, b: 2, c: 3 });
}); store.update("1", { metadata: { b: null, d: 4 } });
it("sets up bidirectional blocks via add_blocks", () => { const task = store.get("1")!;
store.create("Blocker", "Desc", "done"); expect(task.metadata).toEqual({ a: 1, c: 3, d: 4 });
store.create("Blocked", "Desc", "done"); });
store.update("1", { add_blocks: ["2"] }); it("sets up bidirectional blocks via add_blocks", () => {
store.create("Blocker", "Desc", "done");
const t1 = store.get("1")!; store.create("Blocked", "Desc", "done");
const t2 = store.get("2")!;
expect(t1.blocks).toContain("2"); store.update("1", { add_blocks: ["2"] });
expect(t2.blockedBy).toContain("1");
}); const t1 = store.get("1")!;
const t2 = store.get("2")!;
it("sets up bidirectional blocks via add_blocked_by", () => { expect(t1.blocks).toContain("2");
store.create("Blocker", "Desc", "done"); expect(t2.blockedBy).toContain("1");
store.create("Blocked", "Desc", "done"); });
store.update("2", { add_blocked_by: ["1"] }); it("sets up bidirectional blocks via add_blocked_by", () => {
store.create("Blocker", "Desc", "done");
const t1 = store.get("1")!; store.create("Blocked", "Desc", "done");
const t2 = store.get("2")!;
expect(t1.blocks).toContain("2"); store.update("2", { add_blocked_by: ["1"] });
expect(t2.blockedBy).toContain("1");
}); const t1 = store.get("1")!;
const t2 = store.get("2")!;
it("does not duplicate dependency edges", () => { expect(t1.blocks).toContain("2");
store.create("A", "Desc", "done"); expect(t2.blockedBy).toContain("1");
store.create("B", "Desc", "done"); });
store.update("1", { add_blocks: ["2"] }); it("does not duplicate dependency edges", () => {
store.update("1", { add_blocks: ["2"] }); // duplicate store.create("A", "Desc", "done");
store.create("B", "Desc", "done");
const t1 = store.get("1")!;
expect(t1.blocks.filter(id => id === "2")).toHaveLength(1); store.update("1", { add_blocks: ["2"] });
}); store.update("1", { add_blocks: ["2"] }); // duplicate
it("cleans up dependency edges on deletion", () => { const t1 = store.get("1")!;
store.create("A", "Desc", "done"); expect(t1.blocks.filter((id) => id === "2")).toHaveLength(1);
store.create("B", "Desc", "done"); });
store.update("1", { add_blocks: ["2"] });
it("cleans up dependency edges on deletion", () => {
store.update("1", { status: "deleted" }); store.create("A", "Desc", "done");
store.create("B", "Desc", "done");
const t2 = store.get("2")!; store.update("1", { add_blocks: ["2"] });
expect(t2.blockedBy).toEqual([]);
}); store.update("1", { status: "deleted" });
it("clears completed tasks", () => { const t2 = store.get("2")!;
store.create("Completed", "Desc", "done"); expect(t2.blockedBy).toEqual([]);
store.create("Pending", "Desc", "done"); });
store.complete("1");
it("clears completed tasks", () => {
const count = store.clearCompleted(); store.create("Completed", "Desc", "done");
store.create("Pending", "Desc", "done");
expect(count).toBe(1); store.complete("1");
expect(store.list()).toHaveLength(1);
expect(store.list()[0].id).toBe("2"); const count = store.clearCompleted();
});
expect(count).toBe(1);
it("allows TaskUpdate(status=completed) for subtasks", () => { expect(store.list()).toHaveLength(1);
createSubtask(store, "Checklist item"); expect(store.list()[0].id).toBe("2");
const { task, changedFields } = store.update("2", { status: "completed" }); });
expect(task!.status).toBe("completed");
expect(changedFields).toContain("status"); it("allows TaskUpdate(status=completed) for subtasks", () => {
}); createSubtask(store, "Checklist item");
const { task, changedFields } = store.update("2", { status: "completed" });
it("blocks TaskUpdate(status=completed) for top-level tasks", () => { expect(task!.status).toBe("completed");
store.create("Goal", "Desc", "done"); expect(changedFields).toContain("status");
expect(() => store.update("1", { status: "completed" })).toThrow("Top-level task #1 requires proof"); });
});
it("blocks TaskUpdate(status=completed) for top-level tasks", () => {
it("keeps top-level completion gated even after proof evidence exists", () => { store.create("Goal", "Desc", "done");
store.create("Escalated", "Desc", "done"); expect(() => store.update("1", { status: "completed" })).toThrow(
store.update("1", { metadata: { lgtm_evidence: "literal output" } }); "Top-level task #1 requires proof",
expect(() => store.update("1", { status: "completed" })).toThrow("TaskClaimDone"); );
}); });
it("rejects changing parentId after creation", () => { it("keeps top-level completion gated even after proof evidence exists", () => {
store.create("Parent", "Desc", "done"); store.create("Escalated", "Desc", "done");
store.create("Child", "Desc", "done"); store.update("1", { metadata: { lgtm_evidence: "literal output" } });
expect(() => store.update("2", { parentId: "1" })).toThrow("parentId is creation-only"); expect(() => store.update("1", { status: "completed" })).toThrow(
}); "TaskClaimDone",
);
it("returns not found for update on non-existent task", () => { });
const { task, changedFields } = store.update("999", { status: "in_progress" });
expect(task).toBeUndefined(); it("rejects changing parentId after creation", () => {
expect(changedFields).toEqual([]); store.create("Parent", "Desc", "done");
}); store.create("Child", "Desc", "done");
expect(() => store.update("2", { parentId: "1" })).toThrow(
it("complete() is the internal proof-review completion path", () => { "parentId is creation-only",
store.create("Test", "Desc", "done"); );
const task = store.complete("1"); });
expect(task.status).toBe("completed");
}); it("returns not found for update on non-existent task", () => {
const { task, changedFields } = store.update("999", {
it("complete() also works for subtasks", () => { status: "in_progress",
createSubtask(store, "Test"); });
const task = store.complete("2"); expect(task).toBeUndefined();
expect(task.status).toBe("completed"); expect(changedFields).toEqual([]);
}); });
it("complete() throws on non-existent task", () => { it("complete() is the internal proof-review completion path", () => {
expect(() => store.complete("999")).toThrow("not found"); store.create("Test", "Desc", "done");
}); const task = store.complete("1");
expect(task.status).toBe("completed");
it("delete method works", () => { });
store.create("Test", "Desc", "done");
expect(store.delete("1")).toBe(true); it("complete() also works for subtasks", () => {
expect(store.delete("1")).toBe(false); // already deleted createSubtask(store, "Test");
expect(store.list()).toHaveLength(0); const task = store.complete("2");
}); expect(task.status).toBe("completed");
});
it("creates tasks with metadata via TaskCreate", () => {
const t = store.create("With meta", "Desc", "done", undefined, { pr: "123", reviewer: "alice" }); it("complete() throws on non-existent task", () => {
expect(t.metadata).toEqual({ pr: "123", reviewer: "alice" }); expect(() => store.complete("999")).toThrow("not found");
});
const retrieved = store.get("1")!;
expect(retrieved.metadata).toEqual({ pr: "123", reviewer: "alice" }); it("delete method works", () => {
}); store.create("Test", "Desc", "done");
expect(store.delete("1")).toBe(true);
it("allows circular dependencies with warning", () => { expect(store.delete("1")).toBe(false); // already deleted
store.create("A", "Desc", "done"); expect(store.list()).toHaveLength(0);
store.create("B", "Desc", "done"); });
store.update("1", { add_blocks: ["2"] });
const { warnings } = store.update("2", { add_blocks: ["1"] }); it("creates tasks with metadata via TaskCreate", () => {
const t = store.create("With meta", "Desc", "done", undefined, {
expect(store.get("1")!.blocks).toContain("2"); pr: "123",
expect(store.get("2")!.blocks).toContain("1"); reviewer: "alice",
expect(warnings).toContain("cycle: #2 and #1 block each other"); });
}); expect(t.metadata).toEqual({ pr: "123", reviewer: "alice" });
it("allows self-dependency with warning", () => { const retrieved = store.get("1")!;
store.create("Self", "Desc", "done"); expect(retrieved.metadata).toEqual({ pr: "123", reviewer: "alice" });
const { warnings } = store.update("1", { add_blocks: ["1"] }); });
expect(store.get("1")!.blocks).toContain("1");
expect(warnings).toContain("#1 blocks itself"); it("allows circular dependencies with warning", () => {
}); store.create("A", "Desc", "done");
store.create("B", "Desc", "done");
it("stores dangling edge IDs with warning", () => { store.update("1", { add_blocks: ["2"] });
store.create("Real", "Desc", "done"); const { warnings } = store.update("2", { add_blocks: ["1"] });
const { warnings } = store.update("1", { add_blocks: ["9999"] });
expect(store.get("1")!.blocks).toContain("9999"); expect(store.get("1")!.blocks).toContain("2");
expect(warnings).toContain("#9999 does not exist"); expect(store.get("2")!.blocks).toContain("1");
}); expect(warnings).toContain("cycle: #2 and #1 block each other");
});
it("returns no warnings for valid dependencies", () => {
store.create("A", "Desc", "done"); it("allows self-dependency with warning", () => {
store.create("B", "Desc", "done"); store.create("Self", "Desc", "done");
const { warnings } = store.update("1", { add_blocks: ["2"] }); const { warnings } = store.update("1", { add_blocks: ["1"] });
expect(warnings).toEqual([]); expect(store.get("1")!.blocks).toContain("1");
}); expect(warnings).toContain("#1 blocks itself");
});
it("accepts whitespace-only subjects (matches Claude Code)", () => {
const t = store.create(" ", "Desc", "done"); it("stores dangling edge IDs with warning", () => {
expect(t.subject).toBe(" "); store.create("Real", "Desc", "done");
}); const { warnings } = store.update("1", { add_blocks: ["9999"] });
expect(store.get("1")!.blocks).toContain("9999");
it("updates progress_label field", () => { expect(warnings).toContain("#9999 does not exist");
store.create("Test", "Desc", "done"); });
const { changedFields } = store.update("1", { progress_label: "Running tests" });
expect(changedFields).toContain("progress_label"); it("returns no warnings for valid dependencies", () => {
expect(store.get("1")!.progress_label).toBe("Running tests"); store.create("A", "Desc", "done");
}); store.create("B", "Desc", "done");
const { warnings } = store.update("1", { add_blocks: ["2"] });
it("updates description field", () => { expect(warnings).toEqual([]);
store.create("Test", "Original desc", "done"); });
const { changedFields } = store.update("1", { description: "Updated desc" });
expect(changedFields).toContain("description"); it("accepts whitespace-only subjects (matches Claude Code)", () => {
expect(store.get("1")!.description).toBe("Updated desc"); const t = store.create(" ", "Desc", "done");
}); expect(t.subject).toBe(" ");
});
it("updates done_criterion field", () => {
store.create("Test", "Desc", "original criterion"); it("updates progress_label field", () => {
const { changedFields } = store.update("1", { done_criterion: "updated criterion" }); store.create("Test", "Desc", "done");
expect(changedFields).toContain("done_criterion"); const { changedFields } = store.update("1", {
expect(store.get("1")!.done_criterion).toBe("updated criterion"); progress_label: "Running tests",
}); });
expect(changedFields).toContain("progress_label");
it("returns empty changedFields when updating non-existent task", () => { expect(store.get("1")!.progress_label).toBe("Running tests");
const { task, changedFields, warnings } = store.update("999", { status: "in_progress" }); });
expect(task).toBeUndefined();
expect(changedFields).toEqual([]); it("updates description field", () => {
expect(warnings).toEqual([]); store.create("Test", "Original desc", "done");
}); const { changedFields } = store.update("1", {
description: "Updated desc",
it("clearCompleted cleans up dependency edges", () => { });
store.create("Blocker", "Desc", "done"); expect(changedFields).toContain("description");
store.create("Blocked", "Desc", "done"); expect(store.get("1")!.description).toBe("Updated desc");
store.update("1", { add_blocks: ["2"] }); });
// complete() is the internal proof-review completion path.
store.complete("1"); it("updates done_criterion field", () => {
store.create("Test", "Desc", "original criterion");
store.clearCompleted(); const { changedFields } = store.update("1", {
done_criterion: "updated criterion",
const t2 = store.get("2")!; });
expect(t2.blockedBy).toEqual([]); expect(changedFields).toContain("done_criterion");
}); expect(store.get("1")!.done_criterion).toBe("updated criterion");
});
it("handles multiple add_blocks in one call", () => {
store.create("Blocker", "Desc", "done"); it("returns empty changedFields when updating non-existent task", () => {
store.create("B1", "Desc", "done"); const { task, changedFields, warnings } = store.update("999", {
store.create("B2", "Desc", "done"); status: "in_progress",
});
store.update("1", { add_blocks: ["2", "3"] }); expect(task).toBeUndefined();
expect(changedFields).toEqual([]);
expect(store.get("1")!.blocks).toEqual(["2", "3"]); expect(warnings).toEqual([]);
expect(store.get("2")!.blockedBy).toContain("1"); });
expect(store.get("3")!.blockedBy).toContain("1");
}); it("clearCompleted cleans up dependency edges", () => {
store.create("Blocker", "Desc", "done");
it("add_blocked_by warns on self-dependency", () => { store.create("Blocked", "Desc", "done");
store.create("Self", "Desc", "done"); store.update("1", { add_blocks: ["2"] });
const { warnings } = store.update("1", { add_blocked_by: ["1"] }); // complete() is the internal proof-review completion path.
expect(store.get("1")!.blockedBy).toContain("1"); store.complete("1");
expect(warnings).toContain("#1 blocks itself");
}); store.clearCompleted();
it("add_blocked_by warns on dangling ref", () => { const t2 = store.get("2")!;
store.create("Real", "Desc", "done"); expect(t2.blockedBy).toEqual([]);
const { warnings } = store.update("1", { add_blocked_by: ["9999"] }); });
expect(store.get("1")!.blockedBy).toContain("9999");
expect(warnings).toContain("#9999 does not exist"); it("handles multiple add_blocks in one call", () => {
}); store.create("Blocker", "Desc", "done");
store.create("B1", "Desc", "done");
it("add_blocked_by warns on cycle", () => { store.create("B2", "Desc", "done");
store.create("A", "Desc", "done");
store.create("B", "Desc", "done"); store.update("1", { add_blocks: ["2", "3"] });
store.update("1", { add_blocks: ["2"] });
const { warnings } = store.update("1", { add_blocked_by: ["2"] }); expect(store.get("1")!.blocks).toEqual(["2", "3"]);
expect(warnings).toContain("cycle: #1 and #2 block each other"); expect(store.get("2")!.blockedBy).toContain("1");
}); expect(store.get("3")!.blockedBy).toContain("1");
});
it("clearCompleted returns 0 when no completed tasks", () => {
store.create("Pending", "Desc", "done"); it("add_blocked_by warns on self-dependency", () => {
expect(store.clearCompleted()).toBe(0); store.create("Self", "Desc", "done");
}); const { warnings } = store.update("1", { add_blocked_by: ["1"] });
expect(store.get("1")!.blockedBy).toContain("1");
it("list sorts pending → in_progress → completed with all three present", () => { expect(warnings).toContain("#1 blocks itself");
store.create("Pending task", "Desc", "done"); });
store.create("Completed task", "Desc", "done");
store.create("In-progress task", "Desc", "done"); it("add_blocked_by warns on dangling ref", () => {
store.create("Another pending", "Desc", "done"); store.create("Real", "Desc", "done");
const { warnings } = store.update("1", { add_blocked_by: ["9999"] });
store.complete("2"); expect(store.get("1")!.blockedBy).toContain("9999");
store.update("3", { status: "in_progress" }); expect(warnings).toContain("#9999 does not exist");
});
const tasks = store.list();
const statusOrder: Record<string, number> = { pending: 0, in_progress: 1, completed: 2 }; it("add_blocked_by warns on cycle", () => {
const sorted = [...tasks].sort((a, b) => { store.create("A", "Desc", "done");
const so = (statusOrder[a.status] ?? 0) - (statusOrder[b.status] ?? 0); store.create("B", "Desc", "done");
if (so !== 0) return so; store.update("1", { add_blocks: ["2"] });
return Number(a.id) - Number(b.id); const { warnings } = store.update("1", { add_blocked_by: ["2"] });
}); expect(warnings).toContain("cycle: #1 and #2 block each other");
});
expect(sorted.map(t => t.id)).toEqual(["1", "4", "3", "2"]);
expect(sorted.map(t => t.status)).toEqual(["pending", "pending", "in_progress", "completed"]); it("clearCompleted returns 0 when no completed tasks", () => {
}); store.create("Pending", "Desc", "done");
expect(store.clearCompleted()).toBe(0);
});
it("list sorts pending → in_progress → completed with all three present", () => {
store.create("Pending task", "Desc", "done");
store.create("Completed task", "Desc", "done");
store.create("In-progress task", "Desc", "done");
store.create("Another pending", "Desc", "done");
store.complete("2");
store.update("3", { status: "in_progress" });
const tasks = store.list();
const statusOrder: Record<string, number> = {
pending: 0,
in_progress: 1,
completed: 2,
};
const sorted = [...tasks].sort((a, b) => {
const so = (statusOrder[a.status] ?? 0) - (statusOrder[b.status] ?? 0);
if (so !== 0) return so;
return Number(a.id) - Number(b.id);
});
expect(sorted.map((t) => t.id)).toEqual(["1", "4", "3", "2"]);
expect(sorted.map((t) => t.status)).toEqual([
"pending",
"pending",
"in_progress",
"completed",
]);
});
}); });
describe("TaskStore (file-backed)", () => { describe("TaskStore (file-backed)", () => {
const testListId = `test-${Date.now()}-${Math.random().toString(36).slice(2)}`; const testListId = `test-${Date.now()}-${Math.random().toString(36).slice(2)}`;
const tasksDir = join(homedir(), ".pi", "tasks"); const tasksDir = join(process.cwd(), ".pi", "tasks");
const filePath = join(tasksDir, `${testListId}.json`); const filePath = join(tasksDir, `${testListId}.json`);
afterEach(() => { afterEach(() => {
try { rmSync(filePath); } catch { /* */ } try {
try { rmSync(filePath + ".lock"); } catch { /* */ } rmSync(filePath);
try { rmSync(filePath + ".tmp"); } catch { /* */ } } catch {
}); /* */
}
try {
rmSync(filePath + ".lock");
} catch {
/* */
}
try {
rmSync(filePath + ".tmp");
} catch {
/* */
}
});
it("persists tasks to disk", () => { it("persists tasks to disk", () => {
const store1 = new TaskStore(testListId); const store1 = new TaskStore(testListId);
store1.create("Persistent task", "Should survive reload", "done"); store1.create("Persistent task", "Should survive reload", "done");
const store2 = new TaskStore(testListId); const store2 = new TaskStore(testListId);
const tasks = store2.list(); const tasks = store2.list();
expect(tasks).toHaveLength(1); expect(tasks).toHaveLength(1);
expect(tasks[0].subject).toBe("Persistent task"); expect(tasks[0].subject).toBe("Persistent task");
}); });
it("persists in_progress updates to disk", () => { it("persists in_progress updates to disk", () => {
const store1 = new TaskStore(testListId); const store1 = new TaskStore(testListId);
store1.create("Task", "Desc", "done"); store1.create("Task", "Desc", "done");
store1.update("1", { status: "in_progress" }); store1.update("1", { status: "in_progress" });
const store2 = new TaskStore(testListId); const store2 = new TaskStore(testListId);
expect(store2.get("1")!.status).toBe("in_progress"); expect(store2.get("1")!.status).toBe("in_progress");
}); });
it("persists completed tasks to disk", () => { it("persists completed tasks to disk", () => {
const store1 = new TaskStore(testListId); const store1 = new TaskStore(testListId);
store1.create("Done task", "Desc", "done"); store1.create("Done task", "Desc", "done");
store1.create("Pending task", "Desc", "done"); store1.create("Pending task", "Desc", "done");
store1.complete("1"); store1.complete("1");
const store2 = new TaskStore(testListId); const store2 = new TaskStore(testListId);
expect(store2.get("1")).toBeDefined(); expect(store2.get("1")).toBeDefined();
expect(store2.get("1")!.status).toBe("completed"); expect(store2.get("1")!.status).toBe("completed");
expect(store2.get("2")).toBeDefined(); expect(store2.get("2")).toBeDefined();
expect(store2.list()).toHaveLength(2); expect(store2.list()).toHaveLength(2);
}); });
it("restores all tasks across instances", () => { it("restores all tasks across instances", () => {
const store1 = new TaskStore(testListId); const store1 = new TaskStore(testListId);
store1.create("Pending", "Desc", "done"); store1.create("Pending", "Desc", "done");
store1.create("In progress", "Desc", "done"); store1.create("In progress", "Desc", "done");
store1.create("Done", "Desc", "done"); store1.create("Done", "Desc", "done");
store1.update("2", { status: "in_progress" }); store1.update("2", { status: "in_progress" });
store1.complete("3"); store1.complete("3");
const store2 = new TaskStore(testListId); const store2 = new TaskStore(testListId);
const tasks = store2.list(); const tasks = store2.list();
expect(tasks).toHaveLength(3); expect(tasks).toHaveLength(3);
expect(tasks.map(t => t.id)).toContain("1"); expect(tasks.map((t) => t.id)).toContain("1");
expect(tasks.map(t => t.id)).toContain("2"); expect(tasks.map((t) => t.id)).toContain("2");
expect(tasks.map(t => t.id)).toContain("3"); expect(tasks.map((t) => t.id)).toContain("3");
}); });
it("persists ID counter across instances", () => { it("persists ID counter across instances", () => {
const store1 = new TaskStore(testListId); const store1 = new TaskStore(testListId);
store1.create("Task 1", "Desc", "done"); store1.create("Task 1", "Desc", "done");
store1.create("Task 2", "Desc", "done"); store1.create("Task 2", "Desc", "done");
const store2 = new TaskStore(testListId); const store2 = new TaskStore(testListId);
const t3 = store2.create("Task 3", "Desc", "done"); const t3 = store2.create("Task 3", "Desc", "done");
expect(t3.id).toBe("3"); expect(t3.id).toBe("3");
}); });
}); });
describe("TaskStore (absolute path)", () => { describe("TaskStore (absolute path)", () => {
const absFilePath = join(tmpdir(), `pi-tasks-test-${Date.now()}.json`); const absFilePath = join(tmpdir(), `pi-tasks-test-${Date.now()}.json`);
afterEach(() => { afterEach(() => {
try { rmSync(absFilePath); } catch { /* */ } try {
try { rmSync(absFilePath + ".lock"); } catch { /* */ } rmSync(absFilePath);
try { rmSync(absFilePath + ".tmp"); } catch { /* */ } } catch {
}); /* */
}
try {
rmSync(absFilePath + ".lock");
} catch {
/* */
}
try {
rmSync(absFilePath + ".tmp");
} catch {
/* */
}
});
it("accepts absolute path and persists tasks", () => { it("accepts absolute path and persists tasks", () => {
const store1 = new TaskStore(absFilePath); const store1 = new TaskStore(absFilePath);
store1.create("Abs path task", "Desc", "done"); store1.create("Abs path task", "Desc", "done");
const store2 = new TaskStore(absFilePath); const store2 = new TaskStore(absFilePath);
expect(store2.list()).toHaveLength(1); expect(store2.list()).toHaveLength(1);
expect(store2.list()[0].subject).toBe("Abs path task"); expect(store2.list()[0].subject).toBe("Abs path task");
}); });
it("persists completed tasks when using absolute path", () => { it("persists completed tasks when using absolute path", () => {
const store1 = new TaskStore(absFilePath); const store1 = new TaskStore(absFilePath);
store1.create("Pending", "Desc", "done"); store1.create("Pending", "Desc", "done");
store1.create("Completed", "Desc", "done"); store1.create("Completed", "Desc", "done");
store1.complete("2"); store1.complete("2");
const raw = JSON.parse(readFileSync(absFilePath, "utf-8")); const raw = JSON.parse(readFileSync(absFilePath, "utf-8"));
expect(raw.tasks).toHaveLength(2); expect(raw.tasks).toHaveLength(2);
}); });
}); });
+336 -333
View File
@@ -4,425 +4,428 @@ import { TaskWidget, type Theme, type UICtx } from "../src/ui/task-widget.js";
/** Create a mock theme that returns raw text (no ANSI escapes). */ /** Create a mock theme that returns raw text (no ANSI escapes). */
function mockTheme(): Theme { function mockTheme(): Theme {
return { return {
fg: (_color: string, text: string) => text, fg: (_color: string, text: string) => text,
bold: (text: string) => text, bold: (text: string) => text,
strikethrough: (text: string) => `~~${text}~~`, strikethrough: (text: string) => `~~${text}~~`,
}; };
} }
/** Create a mock UICtx that captures setWidget calls. */ /** Create a mock UICtx that captures setWidget calls. */
function mockUICtx() { function mockUICtx() {
const state: { const state: {
widgets: Map<string, any>; widgets: Map<string, any>;
statuses: Map<string, string | undefined>; statuses: Map<string, string | undefined>;
} = { } = {
widgets: new Map(), widgets: new Map(),
statuses: new Map(), statuses: new Map(),
}; };
const ctx: UICtx = { const ctx: UICtx = {
setWidget(key, content, options) { setWidget(key, content, options) {
state.widgets.set(key, { content, options }); state.widgets.set(key, { content, options });
}, },
setStatus(key, text) { setStatus(key, text) {
state.statuses.set(key, text); state.statuses.set(key, text);
}, },
}; };
return { ctx, state }; return { ctx, state };
} }
/** Render the widget and return its lines. */ /** Render the widget and return its lines. */
function renderWidget(state: ReturnType<typeof mockUICtx>["state"]): string[] { function renderWidget(state: ReturnType<typeof mockUICtx>["state"]): string[] {
const entry = state.widgets.get("tasks"); const entry = state.widgets.get("tasks");
if (!entry?.content) return []; if (!entry?.content) return [];
const theme = mockTheme(); const theme = mockTheme();
const tui = { terminal: { columns: 200 }, requestRender() {} }; const tui = { terminal: { columns: 200 }, requestRender() {} };
const result = entry.content(tui, theme); const result = entry.content(tui, theme);
return result.render(); return result.render();
} }
describe("TaskWidget", () => { describe("TaskWidget", () => {
let store: TaskStore; let store: TaskStore;
let widget: TaskWidget; let widget: TaskWidget;
let ui: ReturnType<typeof mockUICtx>; let ui: ReturnType<typeof mockUICtx>;
beforeEach(() => { beforeEach(() => {
vi.useFakeTimers(); vi.useFakeTimers();
store = new TaskStore(); store = new TaskStore();
widget = new TaskWidget(store); widget = new TaskWidget(store);
ui = mockUICtx(); ui = mockUICtx();
widget.setUICtx(ui.ctx); widget.setUICtx(ui.ctx);
}); });
afterEach(() => { afterEach(() => {
widget.dispose(); widget.dispose();
vi.useRealTimers(); vi.useRealTimers();
}); });
it("shows nothing when no tasks exist", () => { it("shows nothing when no tasks exist", () => {
widget.update(); widget.update();
const entry = ui.state.widgets.get("tasks"); const entry = ui.state.widgets.get("tasks");
expect(entry?.content).toBeUndefined(); expect(entry?.content).toBeUndefined();
}); });
it("renders pending tasks with ◻ icon", () => { it("renders pending tasks with ◻ icon", () => {
store.create("Do something", "Desc", "done"); store.create("Do something", "Desc", "done");
widget.update(); widget.update();
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
expect(lines).toHaveLength(2); // header + 1 task expect(lines).toHaveLength(2); // header + 1 task
expect(lines[0]).toContain("1 tasks"); expect(lines[0]).toContain("1 goals");
expect(lines[0]).toContain("1 open"); expect(lines[0]).toContain("1 open");
expect(lines[1]).toContain("◻"); expect(lines[1]).toContain("◻");
expect(lines[1]).toContain("Do something"); expect(lines[1]).toContain("Do something");
expect(lines[1]).not.toContain("done"); expect(lines[1]).not.toContain("done");
}); });
it("renders in-progress tasks with ◼ icon", () => { it("renders in-progress tasks with ◼ icon", () => {
store.create("Working on it", "Desc", "done"); store.create("Working on it", "Desc", "done");
store.update("1", { status: "in_progress" }); store.update("1", { status: "in_progress" });
widget.update(); widget.update();
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
expect(lines[1]).toContain("◼"); expect(lines[1]).toContain("◼");
expect(lines[1]).toContain("Working on it"); expect(lines[1]).toContain("Working on it");
}); });
it("renders completed tasks with ✔ icon and strikethrough", () => { it("hides the widget when only completed tasks remain", () => {
store.create("Done task", "Desc", "done"); store.create("Done task", "Desc", "done");
store.complete("1"); store.complete("1");
widget.update(); widget.update();
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
expect(lines[1]).toContain("✔"); expect(lines).toEqual([]);
expect(lines[1]).toContain("~~#1 Done task~~"); });
});
it("does not render proof badges on collapsed rows", () => { it("does not render proof badges on collapsed rows", () => {
store.create("Done task", "Desc", "done"); store.create("Open task", "Desc", "done");
store.update("1", { store.create("Done task", "Desc", "done");
metadata: { robot_review_observations: ["Observed output drift on seed 2"], lgtm_evidence: "verbatim output" }, store.update("2", {
}); metadata: {
store.complete("1"); robot_review_observations: ["Observed output drift on seed 2"],
widget.update(); lgtm_evidence: "verbatim output",
},
});
store.complete("2");
widget.update();
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
expect(lines[1]).not.toContain("["); expect(lines[1]).toContain("Open task");
expect(lines[1]).not.toContain("🛠"); expect(lines[1]).not.toContain("[");
expect(lines[1]).not.toContain("🤖"); expect(lines[1]).not.toContain("robot_review_observations");
}); expect(lines[1]).not.toContain("lgtm_evidence");
});
it("renders active tasks with spinner icon", () => { it("renders active tasks with spinner icon", () => {
store.create("Running thing", "Desc", "done criterion", "Processing data"); store.create("Running thing", "Desc", "done criterion", "Processing data");
store.update("1", { status: "in_progress" }); store.update("1", { status: "in_progress" });
widget.setActiveTask("1", true); widget.setActiveTask("1", true);
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
// Should show activeForm text with "…" suffix // Should show activeForm text with "…" suffix
expect(lines[1]).toContain("Processing data…"); expect(lines[1]).toContain("Processing data…");
// Should NOT show ◼ for active task // Should NOT show ◼ for active task
expect(lines[1]).not.toContain("◼"); expect(lines[1]).not.toContain("◼");
}); });
it("shows blocked-by info for pending tasks", () => { it("shows blocked-by info for pending tasks", () => {
store.create("Blocker", "Desc", "done"); store.create("Blocker", "Desc", "done");
store.create("Blocked", "Desc", "done"); store.create("Blocked", "Desc", "done");
store.update("2", { add_blocked_by: ["1"] }); store.update("2", { add_blocked_by: ["1"] });
widget.update(); widget.update();
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
const blockedLine = lines.find(l => l.includes("Blocked")); const blockedLine = lines.find((l) => l.includes("Blocked"));
// blocked-by suffix is only added via dim theme helper, which in mock is identity // blocked-by suffix is only added via dim theme helper, which in mock is identity
// So we should see the raw text. Check for the relevant subject line having blocked-by info // So we should see the raw text. Check for the relevant subject line having blocked-by info
expect(blockedLine).toContain("blocked by #1"); expect(blockedLine).toContain("blocked by #1");
}); });
it("hides completed blockers in blocked-by suffix", () => { it("hides completed blockers in blocked-by suffix", () => {
store.create("Blocker", "Desc", "done"); store.create("Blocker", "Desc", "done");
store.create("Blocked", "Desc", "done"); store.create("Blocked", "Desc", "done");
store.update("2", { add_blocked_by: ["1"] }); store.update("2", { add_blocked_by: ["1"] });
store.complete("1"); store.complete("1");
widget.update(); widget.update();
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
const blockedLine = lines.find(l => l.includes("Blocked")); const blockedLine = lines.find((l) => l.includes("Blocked"));
expect(blockedLine).not.toContain("blocked by"); expect(blockedLine).not.toContain("blocked by");
}); });
it("shows status summary in header", () => { it("shows status summary in header", () => {
store.create("Task A", "Desc", "done"); store.create("Task A", "Desc", "done");
store.create("Task B", "Desc", "done"); store.create("Task B", "Desc", "done");
store.create("Task C", "Desc", "done"); store.create("Task C", "Desc", "done");
store.complete("1"); store.complete("1");
store.update("2", { status: "in_progress" }); store.update("2", { status: "in_progress" });
widget.update(); widget.update();
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
expect(lines[0]).toContain("3 tasks"); expect(lines[0]).toContain("3 goals");
expect(lines[0]).toContain("1 done"); expect(lines[0]).toContain("1 done hidden");
expect(lines[0]).toContain("1 in progress"); expect(lines[0]).toContain("1 in progress");
expect(lines[0]).toContain("1 open"); expect(lines[0]).toContain("1 open");
}); });
it("clears widget when all tasks are deleted", () => { it("clears widget when all tasks are deleted", () => {
store.create("Task", "Desc", "done"); store.create("Task", "Desc", "done");
widget.update(); widget.update();
expect(ui.state.widgets.get("tasks")?.content).toBeDefined(); expect(ui.state.widgets.get("tasks")?.content).toBeDefined();
store.update("1", { status: "deleted" }); store.update("1", { status: "deleted" });
widget.update(); widget.update();
expect(ui.state.widgets.get("tasks")?.content).toBeUndefined(); expect(ui.state.widgets.get("tasks")?.content).toBeUndefined();
}); });
it("limits visible tasks to MAX_VISIBLE_TASKS", () => { it("limits visible tasks to MAX_VISIBLE_TASKS", () => {
for (let i = 0; i < 15; i++) { for (let i = 0; i < 15; i++) {
store.create(`Task ${i + 1}`, "Desc", "done"); store.create(`Task ${i + 1}`, "Desc", "done");
} }
widget.update(); widget.update();
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
// header + 5 visible tasks + "...and 10 more" // header + 5 visible tasks + "...and 10 more open"
expect(lines).toHaveLength(7); expect(lines).toHaveLength(7);
expect(lines[6]).toContain("10 more"); expect(lines[6]).toContain("10 more open");
}); });
it("tracks token usage for active tasks", () => { it("tracks token usage for active tasks", () => {
store.create("Active task", "Desc", "done criterion", "Running"); store.create("Active task", "Desc", "done criterion", "Running");
store.update("1", { status: "in_progress" }); store.update("1", { status: "in_progress" });
widget.setActiveTask("1", true); widget.setActiveTask("1", true);
widget.addTokenUsage(1000, 500); widget.addTokenUsage(1000, 500);
widget.addTokenUsage(500, 300); widget.addTokenUsage(500, 300);
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
const activeLine = lines.find(l => l.includes("Running…")); const activeLine = lines.find((l) => l.includes("Running…"));
expect(activeLine).toContain("↑ 1.5k"); expect(activeLine).toContain("↑ 1.5k");
expect(activeLine).toContain("↓ 800"); expect(activeLine).toContain("↓ 800");
}); });
it("deactivates a task with setActiveTask(id, false)", () => { it("deactivates a task with setActiveTask(id, false)", () => {
store.create("Task", "Desc", "done criterion", "Doing work"); store.create("Task", "Desc", "done criterion", "Doing work");
store.update("1", { status: "in_progress" }); store.update("1", { status: "in_progress" });
widget.setActiveTask("1", true); widget.setActiveTask("1", true);
// Should be active (spinner) // Should be active (spinner)
let lines = renderWidget(ui.state); let lines = renderWidget(ui.state);
expect(lines[1]).toContain("Doing work…"); expect(lines[1]).toContain("Doing work…");
widget.setActiveTask("1", false); widget.setActiveTask("1", false);
lines = renderWidget(ui.state); lines = renderWidget(ui.state);
// Should now show as regular in_progress (◼) // Should now show as regular in_progress (◼)
expect(lines[1]).toContain("◼"); expect(lines[1]).toContain("◼");
expect(lines[1]).not.toContain("Doing work…"); expect(lines[1]).not.toContain("Doing work…");
}); });
it("prunes stale active IDs on update", () => { it("prunes stale active IDs on update", () => {
store.create("Task", "Desc", "done"); store.create("Task", "Desc", "done");
store.update("1", { status: "in_progress" }); store.update("1", { status: "in_progress" });
widget.setActiveTask("1", true); widget.setActiveTask("1", true);
// Complete the task externally // Complete the task externally
store.complete("1"); store.complete("1");
widget.update(); widget.update();
// Should render as completed, not active // Completed tasks are hidden from the default widget
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
expect(lines[1]).toContain("✔"); expect(lines).toEqual([]);
expect(lines[1]).toContain("~~#1 Task~~"); });
});
it("supports multiple active tasks simultaneously", () => { it("supports multiple active tasks simultaneously", () => {
store.create("Task A", "Desc", "done criterion", "Processing A"); store.create("Task A", "Desc", "done criterion", "Processing A");
store.create("Task B", "Desc", "done criterion", "Processing B"); store.create("Task B", "Desc", "done criterion", "Processing B");
store.update("1", { status: "in_progress" }); store.update("1", { status: "in_progress" });
store.update("2", { status: "in_progress" }); store.update("2", { status: "in_progress" });
widget.setActiveTask("1", true); widget.setActiveTask("1", true);
widget.setActiveTask("2", true); widget.setActiveTask("2", true);
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
expect(lines[1]).toContain("Processing A…"); expect(lines[1]).toContain("Processing A…");
expect(lines[2]).toContain("Processing B…"); expect(lines[2]).toContain("Processing B…");
}); });
it("distributes token usage across all active tasks", () => { it("distributes token usage across all active tasks", () => {
store.create("Task A", "Desc", "done criterion", "A"); store.create("Task A", "Desc", "done criterion", "A");
store.create("Task B", "Desc", "done criterion", "B"); store.create("Task B", "Desc", "done criterion", "B");
store.update("1", { status: "in_progress" }); store.update("1", { status: "in_progress" });
store.update("2", { status: "in_progress" }); store.update("2", { status: "in_progress" });
widget.setActiveTask("1", true); widget.setActiveTask("1", true);
widget.setActiveTask("2", true); widget.setActiveTask("2", true);
widget.addTokenUsage(100, 50); widget.addTokenUsage(100, 50);
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
// Both tasks should have the same token counts // Both tasks should have the same token counts
expect(lines[1]).toContain("↑ 100"); expect(lines[1]).toContain("↑ 100");
expect(lines[2]).toContain("↑ 100"); expect(lines[2]).toContain("↑ 100");
}); });
it("dispose clears widget and timer", () => { it("dispose clears widget and timer", () => {
store.create("Task", "Desc", "done"); store.create("Task", "Desc", "done");
store.update("1", { status: "in_progress" }); store.update("1", { status: "in_progress" });
widget.setActiveTask("1", true); widget.setActiveTask("1", true);
widget.dispose(); widget.dispose();
expect(ui.state.widgets.get("tasks")?.content).toBeUndefined(); expect(ui.state.widgets.get("tasks")?.content).toBeUndefined();
}); });
it("uses subject as fallback when no activeForm", () => { it("uses subject as fallback when no activeForm", () => {
store.create("My Subject", "Desc", "done"); store.create("My Subject", "Desc", "done");
store.update("1", { status: "in_progress" }); store.update("1", { status: "in_progress" });
widget.setActiveTask("1", true); widget.setActiveTask("1", true);
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
expect(lines[1]).toContain("My Subject…"); expect(lines[1]).toContain("My Subject…");
}); });
it("shows elapsed time but no token arrows when tokens are zero", () => { it("shows elapsed time but no token arrows when tokens are zero", () => {
store.create("No tokens", "Desc", "done criterion", "Working"); store.create("No tokens", "Desc", "done criterion", "Working");
store.update("1", { status: "in_progress" }); store.update("1", { status: "in_progress" });
widget.setActiveTask("1", true); widget.setActiveTask("1", true);
// No addTokenUsage calls — tokens stay at 0 // No addTokenUsage calls — tokens stay at 0
vi.advanceTimersByTime(5000); vi.advanceTimersByTime(5000);
widget.update(); widget.update();
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
const activeLine = lines.find(l => l.includes("Working…")); const activeLine = lines.find((l) => l.includes("Working…"));
expect(activeLine).toContain("5s"); expect(activeLine).toContain("5s");
expect(activeLine).not.toContain("↑"); expect(activeLine).not.toContain("↑");
expect(activeLine).not.toContain("↓"); expect(activeLine).not.toContain("↓");
}); });
it("cleans up metrics when stale active IDs are pruned", () => { it("cleans up metrics when stale active IDs are pruned", () => {
store.create("Task", "Desc", "done criterion", "Running"); store.create("Task", "Desc", "done criterion", "Running");
store.update("1", { status: "in_progress" }); store.update("1", { status: "in_progress" });
widget.setActiveTask("1", true); widget.setActiveTask("1", true);
widget.addTokenUsage(100, 50); widget.addTokenUsage(100, 50);
// Delete task externally // Delete task externally
store.update("1", { status: "deleted" }); store.update("1", { status: "deleted" });
widget.update(); widget.update();
// Reactivate with same ID (new task) — should get fresh metrics // Reactivate with same ID (new task) — should get fresh metrics
store.create("Task 2", "Desc", "done criterion", "Running"); // ID 2 store.create("Task 2", "Desc", "done criterion", "Running"); // ID 2
store.update("2", { status: "in_progress" }); store.update("2", { status: "in_progress" });
widget.setActiveTask("2", true); widget.setActiveTask("2", true);
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
// Should not carry over old tokens // Should not carry over old tokens
expect(lines[1]).not.toContain("↑ 100"); expect(lines[1]).not.toContain("↑ 100");
}); });
it("indents task lines under header", () => { it("indents task lines under header", () => {
store.create("Indented task", "Desc", "done"); store.create("Indented task", "Desc", "done");
widget.update(); widget.update();
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
// Task line should start with 2 spaces // Task line should start with 2 spaces
expect(lines[1]).toMatch(/^\s{2}/); expect(lines[1]).toMatch(/^\s{2}/);
}); });
it("widget is placed aboveEditor", () => { it("widget is placed aboveEditor", () => {
store.create("Task", "Desc", "done"); store.create("Task", "Desc", "done");
widget.update(); widget.update();
const entry = ui.state.widgets.get("tasks"); const entry = ui.state.widgets.get("tasks");
expect(entry?.options?.placement).toBe("aboveEditor"); expect(entry?.options?.placement).toBe("aboveEditor");
}); });
}); });
describe("formatDuration (via widget rendering)", () => { describe("formatDuration (via widget rendering)", () => {
let store: TaskStore; let store: TaskStore;
let widget: TaskWidget; let widget: TaskWidget;
let ui: ReturnType<typeof mockUICtx>; let ui: ReturnType<typeof mockUICtx>;
beforeEach(() => { beforeEach(() => {
vi.useFakeTimers(); vi.useFakeTimers();
store = new TaskStore(); store = new TaskStore();
widget = new TaskWidget(store); widget = new TaskWidget(store);
ui = mockUICtx(); ui = mockUICtx();
widget.setUICtx(ui.ctx); widget.setUICtx(ui.ctx);
}); });
afterEach(() => { afterEach(() => {
widget.dispose(); widget.dispose();
vi.useRealTimers(); vi.useRealTimers();
}); });
it("shows seconds for short durations", () => { it("shows seconds for short durations", () => {
store.create("Quick", "Desc", "done criterion", "Working"); store.create("Quick", "Desc", "done criterion", "Working");
store.update("1", { status: "in_progress" }); store.update("1", { status: "in_progress" });
widget.setActiveTask("1", true); widget.setActiveTask("1", true);
vi.advanceTimersByTime(30_000); // 30s vi.advanceTimersByTime(30_000); // 30s
widget.update(); widget.update();
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
expect(lines[1]).toContain("30s"); expect(lines[1]).toContain("30s");
}); });
it("shows hours for long durations", () => { it("shows hours for long durations", () => {
store.create("Long", "Desc", "done criterion", "Working"); store.create("Long", "Desc", "done criterion", "Working");
store.update("1", { status: "in_progress" }); store.update("1", { status: "in_progress" });
widget.setActiveTask("1", true); widget.setActiveTask("1", true);
vi.advanceTimersByTime(3_723_000); // 1h 2m 3s → "1h 2m" vi.advanceTimersByTime(3_723_000); // 1h 2m 3s → "1h 2m"
widget.update(); widget.update();
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
expect(lines[1]).toContain("1h 2m"); expect(lines[1]).toContain("1h 2m");
}); });
it("shows exact hours without minutes", () => { it("shows exact hours without minutes", () => {
store.create("Exact", "Desc", "done criterion", "Working"); store.create("Exact", "Desc", "done criterion", "Working");
store.update("1", { status: "in_progress" }); store.update("1", { status: "in_progress" });
widget.setActiveTask("1", true); widget.setActiveTask("1", true);
vi.advanceTimersByTime(7_200_000); // 2h exactly vi.advanceTimersByTime(7_200_000); // 2h exactly
widget.update(); widget.update();
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
expect(lines[1]).toContain("2h)"); expect(lines[1]).toContain("2h)");
}); });
it("shows minutes and seconds", () => { it("shows minutes and seconds", () => {
store.create("Medium", "Desc", "done criterion", "Working"); store.create("Medium", "Desc", "done criterion", "Working");
store.update("1", { status: "in_progress" }); store.update("1", { status: "in_progress" });
widget.setActiveTask("1", true); widget.setActiveTask("1", true);
vi.advanceTimersByTime(169_000); // 2m 49s vi.advanceTimersByTime(169_000); // 2m 49s
widget.update(); widget.update();
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
expect(lines[1]).toContain("2m 49s"); expect(lines[1]).toContain("2m 49s");
}); });
it("formats small token counts without k suffix", () => { it("formats small token counts without k suffix", () => {
store.create("Small", "Desc", "done criterion", "Working"); store.create("Small", "Desc", "done criterion", "Working");
store.update("1", { status: "in_progress" }); store.update("1", { status: "in_progress" });
widget.setActiveTask("1", true); widget.setActiveTask("1", true);
widget.addTokenUsage(500, 200); widget.addTokenUsage(500, 200);
widget.update(); widget.update();
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
expect(lines[1]).toContain("↑ 500"); expect(lines[1]).toContain("↑ 500");
expect(lines[1]).toContain("↓ 200"); expect(lines[1]).toContain("↓ 200");
}); });
it("formats token counts with k suffix and removes .0", () => { it("formats token counts with k suffix and removes .0", () => {
store.create("Large", "Desc", "done criterion", "Working"); store.create("Large", "Desc", "done criterion", "Working");
store.update("1", { status: "in_progress" }); store.update("1", { status: "in_progress" });
widget.setActiveTask("1", true); widget.setActiveTask("1", true);
widget.addTokenUsage(2000, 4100); widget.addTokenUsage(2000, 4100);
widget.update(); widget.update();
const lines = renderWidget(ui.state); const lines = renderWidget(ui.state);
expect(lines[1]).toContain("↑ 2k"); // 2000 → "2k" (not "2.0k") expect(lines[1]).toContain("↑ 2k"); // 2000 → "2k" (not "2.0k")
expect(lines[1]).toContain("↓ 4.1k"); // 4100 → "4.1k" expect(lines[1]).toContain("↓ 4.1k"); // 4100 → "4.1k"
}); });
}); });