diff --git a/README.md b/README.md index e66ed99..6ddff79 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ pi -e npm:@wassname2/pi-plan agent calls `CompleteGoal`, which runs `verify` and a read-only judge and, on accept, marks it done and logs it. -Other commands: `/plan` (print the plan), `/plan clear` (empty `plan.md`, history kept in git), +Other commands: `/plan` (print the goals), `/plan clear` (empty `plan.md`, history kept in git), `/plan judge ` (use a specific model for the sign-off judge; default is your current model). @@ -50,41 +50,48 @@ One file holds the objective, the goals, and a short append-only log. ```markdown # Plan: ship the cache layer -## Goal: Implement cache layer +## Goal: [/] Implement cache layer -status: active -done_when: p95 < 50ms on bench-X. If wrong: timeouts in load-test.log +done_when: p95 < 50ms on bench-X verify: pytest tests/cache -q && python bench/p95.py --max-ms 50 +- [x] wire cache client +- [ ] eviction policy + failure_modes: - cache silently bypassed (hit-rate ~0, latency ok by luck) - bench too small to exercise eviction -- [x] wire cache client -- [ ] eviction policy +evidence: + - load-test.log p95=41ms; bench/p95.py exited 0 + - cache hit-rate 0.93 in load-test.log (not bypassed) ## Log - 2026-06-15 14:02 cache client wired; eviction next ``` -- A goal is a `## Goal:` header with an ``, a `status:` - (`open` | `active` | `done` | `cancelled`), one falsifiable `done_when:`, an optional `verify:` - shell command, an optional short `failure_modes:` pre-mortem list, and `- [ ]` subtasks. -- `done_when` names the evidence that distinguishes real success from a subtle failure. `verify`, - when present, is the deterministic first stage of the sign-off check. -- The agent ticks subtasks, appends to `## Log`, and sets `status` as it works. Multiple goals may - be `active`. +- A goal is a `## Goal:` header whose checkbox carries its state (`[ ]` open, `[/]` active, `[x]` + done, `[-]` cancelled), then an ``, one falsifiable `done_when:`, an optional `verify:` + shell command, `- [ ]` subtasks, an optional short `failure_modes:` pre-mortem list, and an + `evidence:` list. +- `done_when` is the test, written at planning. `evidence` is the proof, a `- ` list the agent fills + at completion pointing at durable artifacts; `CompleteGoal` reads it from the file. `failure_modes` + is the pre-mortem. `verify`, when present, is the deterministic first stage of the sign-off. +- The agent ticks subtasks, appends to `## Log`, and sets the header checkbox (`[/]` when it starts + a goal) as it works. Only `CompleteGoal` writes `[x]`. Multiple goals may be active. ## The sign-off check (`CompleteGoal`) -`CompleteGoal(goal_id, evidence, paths?)` is the one blessed completion path: +`CompleteGoal(goal_id)` is the one blessed completion path. It reads the goal's `evidence:` block +from plan.md (so the proof is git-tracked and human-reviewable before sign-off, not buried in a tool +call): 1. If the goal has a `verify:` command, it is run. A non-zero exit rejects immediately, with no model call. -2. Otherwise a read-only `pi` subprocess (the judge) inspects the evidence against the repo and the - named failure modes and returns a verdict. It re-derives from the artifacts you point it at - rather than trusting the claim, so point `evidence`/`paths` at durable artifacts (saved logs, - committed diffs, files). -3. On accept, the goal's `status` flips to `done` and a `## Log` line is written. On reject, the - goal stays open and the agent is told what is missing. +2. Otherwise a read-only `pi` subprocess (the judge) inspects the `evidence:` items against the repo + and the named failure modes and returns a verdict. It re-derives from the artifacts the evidence + points at rather than trusting the claim, so the `evidence:` list should name durable artifacts + (saved logs, committed diffs, files). +3. On accept, the goal's header checkbox flips to `[x]` and a `## Log` line is written. On reject, + the goal stays open and the agent is told what is missing. The judge defaults to your current model (guaranteed authorized and capable). Set a different one with `/plan judge ` for an independent cross-family check. diff --git a/src/index.ts b/src/index.ts index ec62b81..515cb32 100644 --- a/src/index.ts +++ b/src/index.ts @@ -9,7 +9,7 @@ * judgement rather than guarding it. * * Flow: - * /plan -> plan mode: agent explores, drafts goals into plan.md (planDrafting guides) + * /plan -> plan mode: agent explores, drafts goals into plan.md (planDrafting guides) * agent_end -> review menu (Ready / Edit / $EDITOR / Cancel); Ready offers compaction * execution -> each turn, inject the plan summary (survives compaction) + a reminder; * agent works goals, ticks subtasks, appends ## Log, calls CompleteGoal @@ -75,12 +75,12 @@ export default function piPlanExtension(pi: ExtensionAPI): void { const mark: Record = { done: "✔", active: "▸", open: "◻", cancelled: "✗" }; const lines = [`Plan: ${doc.objective || "(untitled)"}`]; for (const g of doc.goals) { - if (g.status === "done") continue; // hide finished goals; they stay in the file - const open = g.subtasks.filter((s) => !s.done).length; - lines.push(`${mark[g.status]} ${g.subject}${open ? ` (${open} task${open === 1 ? "" : "s"})` : ""}`); + // Show every goal with its status glyph (✔ done, ▸ active, ◻ open, ✗ cancelled) so finished + // goals read as checked off rather than vanishing. Plans are small, so this stays readable. + const total = g.subtasks.length; + const done = g.subtasks.filter((s) => s.done).length; + lines.push(`${mark[g.status]} ${g.subject}${total ? ` (${done}/${total} tasks)` : ""}`); } - const c = counts(doc); - if (c.done) lines.push(`(${c.done} done, hidden)`); return lines; } @@ -203,7 +203,7 @@ export default function piPlanExtension(pi: ExtensionAPI): void { const planFile = planPath(ctx); const planContent = readPlan(ctx); // captured now: ctx is stale after newSession below const parentSession = ctx.sessionManager.getSessionFile(); - const startMsg = `Work the plan in ${planFile}. Pick an open goal, set it active, work its subtasks, and when its done_when is met call CompleteGoal with the evidence. Keep plan.md current as you go.`; + const startMsg = `Work the plan in ${planFile}. Pick an open goal, mark it active (set its header to [/]), work its subtasks, and when its done_when is met fill the goal's evidence: block then call CompleteGoal with the goal_id. Keep plan.md current as you go.`; exitPlanMode(ctx); if (fresh && savedCmdCtx) { @@ -234,22 +234,25 @@ export default function piPlanExtension(pi: ExtensionAPI): void { name: "CompleteGoal", label: "Complete goal", description: - "Sign off a goal once its done_when is met. Runs the goal's verify command (if any) then a " + - "read-only subagent that inspects your evidence against the repo. On accept, the goal is marked " + - "done and logged; on reject, it stays open and you get what is missing. Point evidence at durable " + - "artifacts (saved logs, committed diffs, files), not claims.", + "Sign off a goal once its done_when is met. First fill the goal's evidence: block in plan.md " + + "(a '- ' list pointing at durable artifacts: saved logs, committed diffs, files, not claims), then " + + "call this with the goal_id. Runs the goal's verify command (if any) then a read-only subagent that " + + "inspects that evidence against the repo. On accept, the goal is marked done and logged; on reject, " + + "it stays open and you get what is missing.", parameters: Type.Object({ goal_id: Type.String({ description: "The goal's from plan.md" }), - evidence: Type.String({ description: "What shows the done_when is met, and where to verify it" }), - paths: Type.Optional(Type.Array(Type.String(), { description: "Durable artifacts the judge should inspect" })), }), async execute(_id, params, signal, _onUpdate, ctx) { const content = readPlan(ctx); const goal = findGoal(parse(content), params.goal_id); if (!goal) return text(`No goal #${params.goal_id} in plan.md.`, true); + if (goal.evidence.length === 0) { + return text(`Goal #${goal.id} has no evidence: block. Add a "- " evidence list to the goal in plan.md (what shows done_when is met, and where to verify it), then call CompleteGoal.`, true); + } // Decide the outcome (the I/O); recordSignOff applies it to the file (the pure write). - const outcome = await decideSignOff(goal, params.evidence, params.paths ?? [], state.judgeModel, ctx.cwd, signal); + // Evidence and the artifacts to inspect both come from the goal's evidence: block (single source of truth). + const outcome = await decideSignOff(goal, goal.evidence.join("\n"), goal.evidence, state.judgeModel, ctx.cwd, signal); const res = recordSignOff(content, goal.id, stamp(), outcome); if (res.content !== content) writeFileSync(planPath(ctx), res.content); updateWidget(ctx); @@ -393,9 +396,12 @@ async function runJudge( proc.on("error", (e) => resolve(`VERDICT: reject\nmissing: judge subprocess failed: ${e.message}`)); }); - const verdictLine = output.split("\n").find((l) => /^\s*VERDICT\s*:/i.test(l)) ?? ""; + // The subprocess emits ANSI/CSI control codes in -p mode; strip them so they don't leak into `missing`. + const clean = output.replace(/\u001b\[[0-9;?]*[ -/]*[@-~]/g, ""); + + const verdictLine = clean.split("\n").find((l) => /^\s*VERDICT\s*:/i.test(l)) ?? ""; const accept = /accept/i.test(verdictLine); - const missingMatch = output.match(/missing\s*:\s*([\s\S]*)$/i); - const missing = accept ? "" : (missingMatch?.[1].trim() || output.trim().slice(-500) || "judge gave no reason"); + const missingMatch = clean.match(/missing\s*:\s*([\s\S]*)$/i); + const missing = accept ? "" : (missingMatch?.[1].trim() || clean.trim().slice(-500) || "judge gave no reason"); return { accept, missing }; } diff --git a/src/plan-file.ts b/src/plan-file.ts index a6c8589..05b0eb7 100644 --- a/src/plan-file.ts +++ b/src/plan-file.ts @@ -3,23 +3,29 @@ * * Pure module, no pi deps, so it unit-tests without a runtime. The file is the canonical store and * the agent edits it with its normal Edit tool (create goals, tick subtasks, append log), guided by - * the format in prompts.tsx and the reminder -- the form guides, it does not gate (spec D3). So this + * the format in prompts.ts and the reminder -- the form guides, it does not gate (spec D3). So this * module does NOT render or create goals; the format's single source of truth is the planDrafting * prompt. The only programmatic writers are setGoalStatus + appendLog, used by CompleteGoal to * record an accepted sign-off; both touch one line so the git diff stays readable. * - * Format (spec §4): + * A goal's state lives in a checkbox on its header (single source of truth, renders natively): + * [ ] open [/] active (in progress) [x] done [-] cancelled + * Only CompleteGoal writes [x]; the agent sets [/] when it starts a goal. + * + * Format: * * # Plan: * - * ## Goal: + * ## Goal: [ ] * - * status: open | active | done | cancelled * done_when: * verify: + * - [ ] + * * failure_modes: * - - * - [ ] + * evidence: + * - * * ## Log * - @@ -38,7 +44,10 @@ export interface Goal { status: GoalStatus; done_when: string; verify?: string; + /** Pre-mortem: ways a "done" could be wrong. Written at planning. */ failure_modes: string[]; + /** Proof the done_when is met, pointing at durable artifacts. Written at completion; read by CompleteGoal. */ + evidence: string[]; subtasks: Subtask[]; } @@ -49,12 +58,17 @@ export interface PlanDoc { log: string[]; } -const GOAL_HEADER = /^##\s+Goal:\s*(.*)$/; +// Goal header carries the state checkbox: `## Goal: [x] subject`. The checkbox is optional so a +// header written without one parses as open (group 1 undefined -> " "). +const GOAL_HEADER = /^##\s+Goal:\s*(?:\[([ xX/-])\]\s+)?(.*)$/; const ANY_HEADER = /^#{1,6}\s/; const LOG_HEADER = /^##\s+Log\s*$/i; const ID_COMMENT = /^$/; const CHECKBOX = /^- \[([ xX])\]\s+(.*)$/; +const CHAR_TO_STATUS: Record = { " ": "open", "/": "active", x: "done", "-": "cancelled" }; +const STATUS_TO_CHAR: Record = { open: " ", active: "/", done: "x", cancelled: "-" }; + export function parse(text: string): PlanDoc { const lines = text.split("\n"); let objective = ""; @@ -62,13 +76,14 @@ export function parse(text: string): PlanDoc { const log: string[] = []; let cur: Goal | null = null; - let inFailureModes = false; + // While inside a `failure_modes:`/`evidence:` block, points at the list the "- " items append to. + let curList: string[] | null = null; let inLog = false; const flush = () => { if (cur) goals.push(cur); cur = null; - inFailureModes = false; + curList = null; }; for (const line of lines) { @@ -82,7 +97,8 @@ export function parse(text: string): PlanDoc { if (goalMatch) { flush(); inLog = false; - cur = { id: "", subject: goalMatch[1].trim(), status: "open", done_when: "", failure_modes: [], subtasks: [] }; + const status = CHAR_TO_STATUS[(goalMatch[1] ?? " ").toLowerCase()] ?? "open"; + cur = { id: "", subject: goalMatch[2].trim(), status, done_when: "", failure_modes: [], evidence: [], subtasks: [] }; continue; } @@ -112,32 +128,32 @@ export function parse(text: string): PlanDoc { continue; } - // A checkbox (column 0) is a subtask; checked first so it is never read as a failure mode. + // A checkbox (column 0) is a subtask; checked first so it is never read as a list item. const checkbox = CHECKBOX.exec(line); if (checkbox) { - inFailureModes = false; + curList = null; cur.subtasks.push({ done: checkbox[1].toLowerCase() === "x", text: checkbox[2].trim() }); continue; } - const kv = /^(status|done_when|verify|failure_modes)\s*:\s*(.*)$/.exec(line); + const kv = /^(done_when|verify|failure_modes|evidence)\s*:\s*(.*)$/.exec(line); if (kv) { const [, key, value] = kv; - if (key === "status") cur.status = value.trim() as GoalStatus; - else if (key === "done_when") cur.done_when = value.trim(); + if (key === "done_when") cur.done_when = value.trim(); else if (key === "verify") cur.verify = value.trim() || undefined; - else if (key === "failure_modes") inFailureModes = true; + // failure_modes/evidence open a "- " block; done_when/verify close any open one. + curList = key === "failure_modes" ? cur.failure_modes : key === "evidence" ? cur.evidence : null; continue; } - // Indented "- " items under failure_modes: (a column-0 checkbox already returned above). - if (inFailureModes) { - const fm = /^\s*-\s+(.*)$/.exec(line); - if (fm) { - cur.failure_modes.push(fm[1].trim()); + // Indented "- " items under failure_modes:/evidence: (a column-0 checkbox already returned above). + if (curList) { + const item = /^\s*-\s+(.*)$/.exec(line); + if (item) { + curList.push(item[1].trim()); continue; } - if (line.trim() !== "") inFailureModes = false; + if (line.trim() !== "") curList = null; } } flush(); @@ -159,20 +175,21 @@ export function counts(doc: PlanDoc): { done: number; open: number; active: numb return c; } -/** Flip a goal's `status:` line in place (the one write CompleteGoal needs). */ +/** Flip a goal's header checkbox in place (the one write CompleteGoal needs). Normalizes a header that + * lacks a checkbox by inserting one. */ export function setGoalStatus(text: string, id: string, status: GoalStatus): string { const lines = text.split("\n"); - let i = lines.findIndex((l) => ID_COMMENT.test(l.trim()) && ID_COMMENT.exec(l.trim())?.[1] === id); - if (i === -1) throw new Error(`Goal #${id} not found`); - for (; i < lines.length; i++) { - if (i > 0 && ANY_HEADER.test(lines[i]) && !GOAL_HEADER.test(lines[i]) && !LOG_HEADER.test(lines[i])) break; - const kv = /^(status\s*:\s*)(.*)$/.exec(lines[i]); - if (kv) { - lines[i] = `${kv[1]}${status}`; + const idIdx = lines.findIndex((l) => ID_COMMENT.exec(l.trim())?.[1] === id); + if (idIdx === -1) throw new Error(`Goal #${id} not found`); + // The header sits just above the id comment; scan upward for it. + for (let i = idIdx; i >= 0; i--) { + const m = GOAL_HEADER.exec(lines[i]); + if (m) { + lines[i] = `## Goal: [${STATUS_TO_CHAR[status]}] ${m[2].trim()}`; return lines.join("\n"); } } - throw new Error(`Goal #${id} has no status: line`); + throw new Error(`Goal #${id} has no ## Goal: header`); } /** @@ -184,7 +201,7 @@ export type SignOff = | { kind: "rejected"; missing: string } | { kind: "accepted" }; -/** Apply a sign-off outcome to plan.md text: accept flips status + logs; reject only logs. Pure. */ +/** Apply a sign-off outcome to plan.md text: accept flips the header checkbox to [x] + logs; reject only logs. Pure. */ export function recordSignOff( text: string, goalId: string, diff --git a/src/prompts.ts b/src/prompts.ts index e6771e8..8ff6177 100644 --- a/src/prompts.ts +++ b/src/prompts.ts @@ -34,10 +34,10 @@ * front, because the human reviews this output before any execution. * ──────────────────────────────────────────────────────────────────────── */ export const planDrafting = `\ -You are in plan mode. Explore the repository read-only, then draft a plan into plan.md. +You are in plan mode. Explore the repository read-only, then draft goals into plan.md. Do not write or run code in this phase. Produce a plan the human will review and approve. -Right-size the plan, don't force structure that isn't there: +Right-size it, don't force structure that isn't there: - Default to ONE goal. Add another only when it's a genuinely separate checkpoint you'd want signed off on its own (its own done_when that can pass or fail independently). A long list of near-identical goals should be one goal with subtasks. Most objectives are 1-2 goals. @@ -45,26 +45,40 @@ Right-size the plan, don't force structure that isn't there: a single-action goal. Don't pad with trivial steps. - Don't invent phases to look thorough. When in doubt, merge. -Write each goal in this shape: +Write the whole file in this shape: -## Goal: -status: open +# Plan: + +## Goal: [ ] + done_when: verify: -failure_modes: - - - [ ] - [ ] +failure_modes: + - +evidence: + - + Keep it lean: +- The goal's state is the checkbox in its header: [ ] open, [/] active, [x] done, [-] cancelled. + Leave it [ ] at planning. Every goal needs its line; CompleteGoal finds goals by it. +- The subtask checklist comes right under the goal; failure_modes and the (empty) evidence block + sit at the end, after a blank line. Don't let the dash-lists run together. +- evidence stays empty at planning. You fill it when the goal is actually done, just before calling + CompleteGoal, with a "- " list pointing at real artifacts (files, saved logs, committed diffs). - done_when is ONE concrete, checkable condition, not a paragraph, no "if wrong" clause. The symptom of failure goes in failure_modes, not here. +- done_when names a real artifact: a file, a test result, a committed diff, a program's output. + Never write it about plan.md's own checkbox or ## Log: CompleteGoal writes those when it accepts, + so a done_when about them is circular and the sign-off can never pass. - failure_modes: 0-2 terse items, only the non-obvious ways a "done" could be wrong (a pre-mortem). If you add a verify command, one mode can be "verify passes on a gamed file". - subtasks: a short checklist of the real steps; omit them if the goal is a single action. - Prefer a verify command when success is a test/build/threshold. A green check beats prose. -When the plan is drafted, present it and stop for review. Do not begin execution.`; +When the goals are drafted, present them and stop for review. Do not begin execution.`; /* ───────────────────────────────────────────────────────────────────────── * 2. planInjection — EXEC, injected at each agent start (and after compaction) @@ -81,7 +95,7 @@ export function planInjection(p: { counts: { done: number; open: number }; }): string { if (!p.activeGoal) { - return `Plan (plan.md): ${p.objective}\nNo active goal. ${p.counts.open} open, ${p.counts.done} done. Pick the next goal or run /plan.`; + return `Plan (plan.md): ${p.objective}\nNo active goal. ${p.counts.open} open, ${p.counts.done} done. Pick the next goal (set its header to [/]) or run /plan.`; } const subtasks = p.activeGoal.openSubtasks.length ? p.activeGoal.openSubtasks.map((s) => ` - [ ] ${s}`).join("\n") @@ -109,8 +123,9 @@ export const reminder = `\ Keep plan.md current as you work: - tasks: tick the subtasks you've finished; add any new ones you've discovered. - log: append ONE short line to ## Log (append, don't rewrite earlier lines). -- goal: if the active goal's evidence is in, sign it off by calling CompleteGoal with that - evidence. Don't edit status to done by hand; CompleteGoal runs the check and records it. +- goal: when the active goal's done_when is met, fill its evidence: block in plan.md (a "- " list + pointing at durable artifacts), then call CompleteGoal with the goal_id. Don't tick the goal's + header [x] by hand; CompleteGoal reads the evidence, runs the check, and writes [x]. - otherwise: keep working toward the active goal. Don't stop to ask unless you're genuinely blocked; if blocked, say what's blocking and why. `; @@ -122,9 +137,9 @@ Keep plan.md current as you work: * continue. Does not mutate the system prompt, so the cache holds. * ──────────────────────────────────────────────────────────────────────── */ export const continuation = `\ -Continue toward the active goal in plan.md. If it now meets its done_when, call CompleteGoal -with your evidence (point to durable artifacts: saved logs, committed diffs, files, not just -claims). If you're blocked, state what's blocking it.`; +Continue toward the active goal in plan.md. If it now meets its done_when, fill the goal's +evidence: block (durable artifacts: saved logs, committed diffs, files, not just claims) and then +call CompleteGoal with the goal_id. If you're blocked, state what's blocking it.`; /* ───────────────────────────────────────────────────────────────────────── * 5. loopJudge — EXEC, runs after each turn to decide continue / pause diff --git a/test/plan-file.test.ts b/test/plan-file.test.ts index a0101a6..007e9a6 100644 --- a/test/plan-file.test.ts +++ b/test/plan-file.test.ts @@ -3,9 +3,8 @@ import { appendLog, counts, findGoal, parse, recordSignOff, setGoalStatus } from const SAMPLE = `# Plan: ship the cache layer -## Goal: Implement cache layer +## Goal: [/] Implement cache layer -status: active done_when: p95 < 50ms on bench-X. If wrong: timeouts in load-test.log verify: pytest tests/cache -q failure_modes: @@ -14,10 +13,12 @@ failure_modes: - [x] wire cache client - [ ] eviction policy - [ ] load test +evidence: + - load-test.log shows p95=41ms + - hit-rate 0.93 in load-test.log -## Goal: Document the API +## Goal: [ ] Document the API -status: open done_when: every public fn has a docstring; else sphinx warns failure_modes: - docstrings exist but are stale @@ -53,12 +54,13 @@ describe("parse", () => { expect(doc.goals.map((g) => g.id)).toEqual(["cache-layer-1", "document-the-api-1"]); }); - it("reads goal fields", () => { + it("reads goal fields, with status from the header checkbox", () => { const g = findGoal(doc, "cache-layer-1"); expect(g?.subject).toBe("Implement cache layer"); - expect(g?.status).toBe("active"); + expect(g?.status).toBe("active"); // from the [/] in the header expect(g?.done_when).toBe("p95 < 50ms on bench-X. If wrong: timeouts in load-test.log"); expect(g?.verify).toBe("pytest tests/cache -q"); + expect(findGoal(doc, "document-the-api-1")?.status).toBe("open"); // from [ ] }); it("separates failure_modes from subtasks", () => { @@ -72,6 +74,14 @@ describe("parse", () => { ]); }); + it("reads the evidence block, separate from failure_modes and subtasks", () => { + const g = findGoal(doc, "cache-layer-1"); + expect(g?.evidence).toEqual(["load-test.log shows p95=41ms", "hit-rate 0.93 in load-test.log"]); + expect(g?.failure_modes).toHaveLength(2); // unchanged by the evidence block that follows the subtasks + const g2 = findGoal(doc, "document-the-api-1"); + expect(g2?.evidence).toEqual([]); // a goal with no evidence block parses to [] + }); + it("reads the log verbatim and counts by status", () => { expect(doc.log).toEqual(["- 2026-06-15 14:02 cache client wired; eviction next"]); expect(counts(doc)).toEqual({ done: 0, open: 1, active: 1 }); @@ -81,7 +91,7 @@ describe("parse", () => { describe("failure_modes vs subtask disambiguation", () => { it("a column-0 checkbox right after failure_modes: is a SUBTASK", () => { const doc = parse( - `# Plan: x\n\n## Goal: G\n\nstatus: open\ndone_when: z\nfailure_modes:\n- [ ] first subtask\n- [x] second subtask\n`, + `# Plan: x\n\n## Goal: [ ] G\n\ndone_when: z\nfailure_modes:\n- [ ] first subtask\n- [x] second subtask\n`, ); const g = findGoal(doc, "g-1"); expect(g?.failure_modes).toEqual([]); @@ -93,7 +103,7 @@ describe("failure_modes vs subtask disambiguation", () => { it("an indented checkbox-shaped item inside failure_modes is a FAILURE MODE", () => { const doc = parse( - `# Plan: x\n\n## Goal: G\n\nstatus: open\ndone_when: z\nfailure_modes:\n - [ ] prose that looks like a checkbox\n- [ ] real subtask\n`, + `# Plan: x\n\n## Goal: [ ] G\n\ndone_when: z\nfailure_modes:\n - [ ] prose that looks like a checkbox\n- [ ] real subtask\n`, ); const g = findGoal(doc, "g-2"); expect(g?.failure_modes).toEqual(["[ ] prose that looks like a checkbox"]); @@ -101,7 +111,7 @@ describe("failure_modes vs subtask disambiguation", () => { }); it("a goal with no failure_modes keeps its subtasks", () => { - const doc = parse(`# Plan: x\n\n## Goal: G\n\nstatus: open\ndone_when: z\n- [ ] only subtask\n`); + const doc = parse(`# Plan: x\n\n## Goal: [ ] G\n\ndone_when: z\n- [ ] only subtask\n`); const g = findGoal(doc, "g-3"); expect(g?.failure_modes).toEqual([]); expect(g?.subtasks).toEqual([{ text: "only subtask", done: false }]); @@ -122,6 +132,11 @@ describe("the two CompleteGoal writes (minimal diff)", () => { expect(findGoal(parse(next), "document-the-api-1")?.status).toBe("active"); }); + it("setGoalStatus writes the checkbox char into the header line", () => { + expect(setGoalStatus(SAMPLE, "cache-layer-1", "done")).toContain("## Goal: [x] Implement cache layer"); + expect(setGoalStatus(SAMPLE, "document-the-api-1", "cancelled")).toContain("## Goal: [-] Document the API"); + }); + it("appendLog adds exactly one line under ## Log", () => { const next = appendLog(SAMPLE, "2026-06-15 15:00 eviction done"); expect(lineDelta(SAMPLE, next)).toEqual({ added: 1, removed: 0 }); @@ -132,7 +147,7 @@ describe("the two CompleteGoal writes (minimal diff)", () => { }); it("appendLog creates the section when absent", () => { - const noLog = "# Plan: x\n\n## Goal: y\n\nstatus: open\ndone_when: z\n"; + const noLog = "# Plan: x\n\n## Goal: [ ] y\n\ndone_when: z\n"; expect(parse(appendLog(noLog, "first entry")).log).toEqual(["- first entry"]); }); });