fix: revert to XML dcp-id tags and add strip-before-inject to prevent echo loop

HTML comment format offered no benefit over XML tags since LLMs see raw text, not rendered output. Revert to original <dcp-id>m001</dcp-id> and <dcp-block-id>bN</dcp-block-id> format which is more token-efficient. Add strip-before-inject in injectMessageIds: each context event strips any existing dcp-id tags before appending the fresh one. For clean messages this is idempotent (no cache bust). For model-echoed tags it removes the duplicate on the next context event, breaking the accumulation loop that reinforced echoing. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-27 14:45:37 +08:00 · 2026-04-19 13:59:46 +08:00
parent b83b3f8c53
commit b03651855d
2 changed files with 43 additions and 19 deletions
@@ -13,7 +13,7 @@ You operate in a context-constrained environment. Manage context continuously to

 The ONLY tool you have for context management is \`compress\`. It replaces older conversation content with technical summaries you produce.

-\`<!-- dcp-id: ... -->\` and \`<dcp-system-reminder>\` tags are environment-injected metadata. Do not output them.
+\`<dcp-id>\` and \`<dcp-system-reminder>\` tags are environment-injected metadata. Do not output them.

 THE PHILOSOPHY OF COMPRESS
 \`compress\` transforms conversation content into dense, high-fidelity summaries. This is not cleanup — it is crystallization. Your summary becomes the authoritative record of what transpired.
@@ -114,7 +114,7 @@ You specify boundaries by ID using the injected IDs visible in the conversation:
 - \`mNNN\` IDs identify raw messages (3 digits, zero-padded, e.g. \`m001\`, \`m042\`)
 - \`bN\` IDs identify previously compressed blocks

-Each message has an ID inside an HTML comment like \`<!-- dcp-id: m001 -->\`.
+Each message has an ID tag like \`<dcp-id>m001</dcp-id>\`.
 The ID tag appears at the end of the message it belongs to — it identifies the message above it, not the one below it.
 Treat these tags as boundary metadata only, not as tool result content.

@@ -215,7 +215,7 @@ Prefer multiple short, closed ranges over one large range when several independe
 export const MANUAL_MODE_SYSTEM_PROMPT = `
 You are operating in DCP manual mode for context management.

-\`<!-- dcp-id: ... -->\` and \`<dcp-system-reminder>\` tags are environment-injected metadata. Do not output them.
+\`<dcp-id>\` and \`<dcp-system-reminder>\` tags are environment-injected metadata. Do not output them.

 In manual mode you do NOT proactively compress conversation content. Compression is a deliberate, user-directed action.

@@ -151,9 +151,9 @@ function applyCompressionBlocks(messages: any[], state: DcpState): any[] {
            block.topic +
            "]\n\n" +
            block.summary +
-            "\n\n<!-- dcp-block-id: b" +
+            "\n\n<dcp-block-id>b" +
            block.id +
-            " -->",
+            "</dcp-block-id>",
        },
      ],
      // anchorTimestamp is always finite (resolveAnchorTimestamp returns
@@ -335,9 +335,27 @@ function applyToolOutputPruning(messages: any[], state: DcpState): void {
  }
 }

+/**
+ * Strip any existing dcp-id tags from a string, so strip+inject is idempotent
+ * for clean messages (no cache bust) and removes model-echoed copies.
+ */
+function stripDcpIdTags(content: string): string {
+  return content.replace(/\n<dcp-id>\S+<\/dcp-id>/g, "");
+}
+
+/** Test whether a text block contains a dcp-id tag (for array filtering). */
+function isDcpIdBlock(block: any): boolean {
+  return block.type === "text" && /\n<dcp-id>\S+<\/dcp-id>/.test(block.text);
+}
+
 /**
 * Inject sequential message IDs into eligible messages.
 * Updates state.messageIdSnapshot.
+ *
+ * Strip-before-inject: always strips existing dcp-id tags before appending
+ * the fresh one. For messages that were never echoed this is idempotent
+ * (same result, no cache bust). For messages with model-echoed tags it
+ * removes the duplicate, breaking the accumulation loop.
 */
 function injectMessageIds(messages: any[], state: DcpState): void {
  // Clear the snapshot and rebuild
@@ -356,42 +374,48 @@ function injectMessageIds(messages: any[], state: DcpState): void {
    const id = "m" + String(counter).padStart(3, "0");
    counter++;

-    const idTag = `\n<!-- dcp-id: ${id} -->`;
+    const idTag = `\n<dcp-id>${id}</dcp-id>`;

    if (role === "user") {
      if (typeof msg.content === "string") {
-        msg.content = msg.content + `\n\n<!-- dcp-id: ${id} -->`;
+        msg.content = stripDcpIdTags(msg.content) + `\n\n<dcp-id>${id}</dcp-id>`;
      } else if (Array.isArray(msg.content)) {
-        msg.content = [...msg.content, { type: "text", text: idTag }];
+        msg.content = [
+          ...msg.content.filter((b: any) => !isDcpIdBlock(b)),
+          { type: "text", text: idTag },
+        ];
      }
    } else if (role === "toolResult" || role === "bashExecution") {
      if (Array.isArray(msg.content)) {
-        msg.content = [...msg.content, { type: "text", text: idTag }];
+        msg.content = [
+          ...msg.content.filter((b: any) => !isDcpIdBlock(b)),
+          { type: "text", text: idTag },
+        ];
      } else if (typeof msg.content === "string") {
-        msg.content = msg.content + idTag;
+        msg.content = stripDcpIdTags(msg.content) + idTag;
      }
    } else if (role === "assistant") {
      if (Array.isArray(msg.content)) {
-        // Insert the ID tag before any tool_use (toolCall) blocks.
+        // Strip echoed tags first, then insert before any tool_use (toolCall) blocks.
        // Anthropic requires: thinking → text → tool_use.
-        // Appending after tool_use blocks violates that constraint.
-        const firstToolCallIdx = msg.content.findIndex(
+        const stripped = msg.content.filter(
+          (b: any) => !isDcpIdBlock(b)
+        );
+        const firstToolCallIdx = stripped.findIndex(
          (b: any) => b.type === "toolCall",
        );
        const idBlock = { type: "text", text: idTag };
        if (firstToolCallIdx === -1) {
-          // No tool_use blocks — append as usual
-          msg.content = [...msg.content, idBlock];
+          msg.content = [...stripped, idBlock];
        } else {
-          // Insert immediately before the first tool_use block
          msg.content = [
-            ...msg.content.slice(0, firstToolCallIdx),
+            ...stripped.slice(0, firstToolCallIdx),
            idBlock,
-            ...msg.content.slice(firstToolCallIdx),
+            ...stripped.slice(firstToolCallIdx),
          ];
        }
      } else if (typeof msg.content === "string") {
-        msg.content = msg.content + idTag;
+        msg.content = stripDcpIdTags(msg.content) + idTag;
      }
    }