From c74f625b65e87d5a37790caf94c8aafc4a11a2b6 Mon Sep 17 00:00:00 2001 From: wassname Date: Mon, 15 Jun 2026 18:40:19 +0800 Subject: [PATCH] Fix newSession context bug + mark unwired loop prompts (external review) External review (deepseek-v4-pro, docs/reviews/review.md) found: - Real bug: reviewLoop cast agent_end's ExtensionContext to ExtensionCommandContext and called newSession on it, which would crash the "fresh context" path. Now the /plan command handler's context (which has newSession) is saved and reused, with a graceful in-place fallback (burneikis pattern). - Dead code: continuation + loopJudge prompts are unused (the autonomous loop is intentionally cut). Marked NOT-YET-WIRED in the prompts.ts flow header rather than removed, so the full intended flow stays reviewable. Review also confirmed: no comment bloat, no over-engineering. Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com> --- .gitignore | 2 ++ docs/reviews/review.md | 62 ++++++++++++++++++++++++++++++++++++++++++ src/index.ts | 19 +++++++------ src/prompts.ts | 5 ++++ 4 files changed, 80 insertions(+), 8 deletions(-) create mode 100644 docs/reviews/review.md diff --git a/.gitignore b/.gitignore index 3c25e1e..c3d6dea 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ node_modules/ dist/ *.log +docs/reviews/raw.jsonl +docs/reviews/err.txt diff --git a/docs/reviews/review.md b/docs/reviews/review.md new file mode 100644 index 0000000..e385d18 --- /dev/null +++ b/docs/reviews/review.md @@ -0,0 +1,62 @@ +Code review against spec `docs/spec/2026-06-15_pi-plan.md`. + +--- + +### (A) SPEC MISMATCH — code does not match spec intent + +1. **No loop judge** (spec §9, §3b). The extension lacks any per‑turn evaluation that would decide continue/pause; the loop‑judge prompt (`loopJudgeSystem`, `loopJudgeUser`) is defined but never invoked. No motion. + +2. **`/goal` command missing** (spec §7). No handler for `/goal` (restart loop, pause, resume, clear, status). The only command is `/plan`. + +3. **`/subgoal` command missing** (spec §7). Not implemented. + +4. **`CancelGoal` tool not implemented** (spec §5, optional but present in spec). Not a blocker but a gap. + +5. **Plan‑phase model selection (D12) not implemented**. `planDrafting` always runs on the default model; there is no sticky per‑phase model choice, no selection menu, and no persisting of a plan‑phase model reference. + +6. **Widget does not flag `done` goals that lack a sign‑off log line** (spec §7, §6). The widget hides all done goals unconditionally; the visibility guard is missing. + +7. **`/plan` (no args) does not render the task‑list widget** (spec §7). `showPlan()` dumps raw file content via `notify`; the widget is only set through `updateWidget()` on other events, not by the command itself. + +8. **Injection message role** (spec §11). The `before_agent_start` hook returns a `customType` message with `display: false`. The spec demands a **late user‑role message** to avoid system‑prompt mutation; the actual message role depends on the pi API and may be system, not user, risking cache breakage. + +9. **Missing pre‑compact hook** (spec §8). No `pre‑compact` hook to flush any in‑memory state (even just ensuring `plan.md` is up‑to‑date) before compaction. + +10. **Reminder cadence deviates** (spec §8a). The spec calls for firing after N file‑modifying turns since last `plan.md` update. The code fires if `plan.md` is byte‑identical between agent starts, which is a coarser proxy. + +--- + +### (B) DEAD/UNUSED CODE + +| File | Lines | Reason | +|------|-------|--------| +| `src/prompts.ts` | 128‑146 | `loopJudgeSystem` and `loopJudgeUser` exported but never used. | +| `src/prompts.ts` | 115‑118 | `continuation` exported but never used (the loop is not built). | + +--- + +### (C) OVERLY LONG OR REDUNDANT COMMENTS + +The file‑header comments in `index.ts` (lines 1‑20) and `plan‑file.ts` (lines 1‑26) are fairly concise descriptions of the design; they are not excessive. **No comment bloat worth flagging.** + +--- + +### (D) OVER‑ENGINEERING vs. “super simple” goal + +None. The line‑scanner in `plan‑file.ts` is minimal; the `getPiInvocation()` helper is a straightforward copy from the oracle extension; no unnecessary abstraction or defensive layers. + +--- + +### (E) REAL BUGS + +- **`cmdCtx.newSession` cast risk** (src/index.ts:272, 201). + `reviewLoop` casts `ctx` (type `ExtensionContext`) to `ExtensionCommandContext` to pass to `startExecution`, which calls `cmdCtx.newSession(...)`. If the concrete context does not carry that method, it fails at runtime. (In practice the same object may satisfy it, but the cast hides the truth.) + +- **`showPlan` raw content instead of widget** (src/index.ts:136‑143). + `/plan` with no arguments shows the file content via `ctx.ui.notify`, not the structured task‑list widget the spec expects. The widget is rendered separately via `updateWidget`, but the command does not trigger it, so the output is inconsistent. + +No other obvious logic errors; the sign‑off flow, logging, and parsing work as intended. + +--- + +**Verdict:** A clean scaffold for the sign‑off path, but missing the autonomous loop, `/goal` command, and plan‑phase model selection means it’s not yet the “work autonomously” extension the spec describes. \ No newline at end of file diff --git a/src/index.ts b/src/index.ts index 8f911a4..e02c32a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -44,6 +44,8 @@ export default function piPlanExtension(pi: ExtensionAPI): void { let state: PlanState = { isPlanMode: false, objective: null, judgeModel: null }; // Reminder cadence: fire when an active goal exists but plan.md was not touched since last turn. let lastInjectedPlan = ""; + // newSession is only on the command-handler context; agent_end's ctx lacks it. Save it from /plan. + let savedCmdCtx: ExtensionCommandContext | null = null; const planPath = (ctx: ExtensionContext) => join(ctx.cwd, "plan.md"); const readPlan = (ctx: ExtensionContext): string => (existsSync(planPath(ctx)) ? readFileSync(planPath(ctx), "utf-8") : ""); @@ -87,6 +89,7 @@ export default function piPlanExtension(pi: ExtensionAPI): void { pi.registerCommand("plan", { description: "Plan mode: set up goals (with evidence) in plan.md, then work them. /plan ", handler: async (args, ctx) => { + savedCmdCtx = ctx; // ctx here is an ExtensionCommandContext (has newSession); keep it for later const arg = args.trim(); if (arg === "clear") { await clearPlan(ctx); @@ -144,7 +147,7 @@ export default function piPlanExtension(pi: ExtensionAPI): void { // --- review loop (after the agent drafts the plan) -------------------------------------------- - async function reviewLoop(ctx: ExtensionContext, cmdCtx: ExtensionCommandContext): Promise { + async function reviewLoop(ctx: ExtensionContext): Promise { while (true) { const doc = parse(readPlan(ctx)); const choice = await ctx.ui.select(`Plan: ${doc.goals.length} goal(s). What next?`, [ @@ -158,7 +161,7 @@ export default function piPlanExtension(pi: ExtensionAPI): void { ctx.ui.notify("Left plan mode. plan.md kept.", "info"); return; } - if (choice.startsWith("Ready")) return startExecution(ctx, cmdCtx); + if (choice.startsWith("Ready")) return startExecution(ctx); if (choice.startsWith("Edit")) { const changes = await ctx.ui.editor("What should change about the plan?", ""); if (changes?.trim()) { @@ -180,10 +183,10 @@ export default function piPlanExtension(pi: ExtensionAPI): void { updateWidget(ctx); } - async function startExecution(ctx: ExtensionContext, cmdCtx: ExtensionCommandContext): Promise { - // Offer a clean execution context (D13): some runs want the fresh handoff, some want to keep it. + async function startExecution(ctx: ExtensionContext): Promise { + // Offer a clean execution context (D13). newSession lives only on the saved command context. let fresh = false; - if (ctx.hasUI) { + if (ctx.hasUI && savedCmdCtx) { const choice = await ctx.ui.select("Start working the plan in...", [ "This context (keep history)", "A fresh, compacted context", @@ -194,8 +197,8 @@ export default function piPlanExtension(pi: ExtensionAPI): void { const doc = parse(readPlan(ctx)); if (doc.objective) pi.setSessionName(`Plan: ${doc.objective}`); - if (fresh) { - const result = await cmdCtx.newSession({ parentSession: ctx.sessionManager.getSessionFile() }); + if (fresh && savedCmdCtx) { + const result = await savedCmdCtx.newSession({ parentSession: ctx.sessionManager.getSessionFile() }); if (result.cancelled) { ctx.ui.notify("Execution cancelled.", "warning"); return; @@ -269,7 +272,7 @@ export default function piPlanExtension(pi: ExtensionAPI): void { ctx.ui.notify("No goals found in plan.md yet — ask the agent to draft them.", "warning"); return; } - await reviewLoop(ctx, ctx as ExtensionCommandContext); + await reviewLoop(ctx); }); // Keep only the freshest injected plan summary; strip stale ones so history does not bloat and diff --git a/src/prompts.ts b/src/prompts.ts index 2915771..383660b 100644 --- a/src/prompts.ts +++ b/src/prompts.ts @@ -17,6 +17,11 @@ * * Read top to bottom to see the whole process. 5 and 6 are kept adjacent on * purpose: the cheap-foolable vs must-not-be-fooled contrast is the design. + * + * WIRED in index.ts: 1 planDrafting, 2 planInjection, 3 reminder, 6 evidenceJudge. + * NOT YET WIRED: 4 continuation and 5 loopJudge define the autonomous re-prompt loop, which is + * intentionally not built in v1 (an until-done-style loop was judged too complex). They stay here so + * the full intended flow is reviewable; wire them if/when the loop is added. */ /* ─────────────────────────────────────────────────────────────────────────