fix: revert to XML dcp-id tags and add strip-before-inject to prevent echo loop

HTML comment format offered no benefit over XML tags since LLMs see raw
text, not rendered output. Revert to original <dcp-id>m001</dcp-id> and
<dcp-block-id>bN</dcp-block-id> format which is more token-efficient.

Add strip-before-inject in injectMessageIds: each context event strips any
existing dcp-id tags before appending the fresh one. For clean messages
this is idempotent (no cache bust). For model-echoed tags it removes the
duplicate on the next context event, breaking the accumulation loop that
reinforced echoing.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
wassname
2026-04-19 13:59:46 +08:00
parent b83b3f8c53
commit b03651855d
2 changed files with 43 additions and 19 deletions
+3 -3
View File
@@ -13,7 +13,7 @@ You operate in a context-constrained environment. Manage context continuously to
The ONLY tool you have for context management is \`compress\`. It replaces older conversation content with technical summaries you produce.
\`<!-- dcp-id: ... -->\` and \`<dcp-system-reminder>\` tags are environment-injected metadata. Do not output them.
\`<dcp-id>\` and \`<dcp-system-reminder>\` tags are environment-injected metadata. Do not output them.
THE PHILOSOPHY OF COMPRESS
\`compress\` transforms conversation content into dense, high-fidelity summaries. This is not cleanup — it is crystallization. Your summary becomes the authoritative record of what transpired.
@@ -114,7 +114,7 @@ You specify boundaries by ID using the injected IDs visible in the conversation:
- \`mNNN\` IDs identify raw messages (3 digits, zero-padded, e.g. \`m001\`, \`m042\`)
- \`bN\` IDs identify previously compressed blocks
Each message has an ID inside an HTML comment like \`<!-- dcp-id: m001 -->\`.
Each message has an ID tag like \`<dcp-id>m001</dcp-id>\`.
The ID tag appears at the end of the message it belongs to — it identifies the message above it, not the one below it.
Treat these tags as boundary metadata only, not as tool result content.
@@ -215,7 +215,7 @@ Prefer multiple short, closed ranges over one large range when several independe
export const MANUAL_MODE_SYSTEM_PROMPT = `
You are operating in DCP manual mode for context management.
\`<!-- dcp-id: ... -->\` and \`<dcp-system-reminder>\` tags are environment-injected metadata. Do not output them.
\`<dcp-id>\` and \`<dcp-system-reminder>\` tags are environment-injected metadata. Do not output them.
In manual mode you do NOT proactively compress conversation content. Compression is a deliberate, user-directed action.
+40 -16
View File
@@ -151,9 +151,9 @@ function applyCompressionBlocks(messages: any[], state: DcpState): any[] {
block.topic +
"]\n\n" +
block.summary +
"\n\n<!-- dcp-block-id: b" +
"\n\n<dcp-block-id>b" +
block.id +
" -->",
"</dcp-block-id>",
},
],
// anchorTimestamp is always finite (resolveAnchorTimestamp returns
@@ -335,9 +335,27 @@ function applyToolOutputPruning(messages: any[], state: DcpState): void {
}
}
/**
* Strip any existing dcp-id tags from a string, so strip+inject is idempotent
* for clean messages (no cache bust) and removes model-echoed copies.
*/
function stripDcpIdTags(content: string): string {
return content.replace(/\n<dcp-id>\S+<\/dcp-id>/g, "");
}
/** Test whether a text block contains a dcp-id tag (for array filtering). */
function isDcpIdBlock(block: any): boolean {
return block.type === "text" && /\n<dcp-id>\S+<\/dcp-id>/.test(block.text);
}
/**
* Inject sequential message IDs into eligible messages.
* Updates state.messageIdSnapshot.
*
* Strip-before-inject: always strips existing dcp-id tags before appending
* the fresh one. For messages that were never echoed this is idempotent
* (same result, no cache bust). For messages with model-echoed tags it
* removes the duplicate, breaking the accumulation loop.
*/
function injectMessageIds(messages: any[], state: DcpState): void {
// Clear the snapshot and rebuild
@@ -356,42 +374,48 @@ function injectMessageIds(messages: any[], state: DcpState): void {
const id = "m" + String(counter).padStart(3, "0");
counter++;
const idTag = `\n<!-- dcp-id: ${id} -->`;
const idTag = `\n<dcp-id>${id}</dcp-id>`;
if (role === "user") {
if (typeof msg.content === "string") {
msg.content = msg.content + `\n\n<!-- dcp-id: ${id} -->`;
msg.content = stripDcpIdTags(msg.content) + `\n\n<dcp-id>${id}</dcp-id>`;
} else if (Array.isArray(msg.content)) {
msg.content = [...msg.content, { type: "text", text: idTag }];
msg.content = [
...msg.content.filter((b: any) => !isDcpIdBlock(b)),
{ type: "text", text: idTag },
];
}
} else if (role === "toolResult" || role === "bashExecution") {
if (Array.isArray(msg.content)) {
msg.content = [...msg.content, { type: "text", text: idTag }];
msg.content = [
...msg.content.filter((b: any) => !isDcpIdBlock(b)),
{ type: "text", text: idTag },
];
} else if (typeof msg.content === "string") {
msg.content = msg.content + idTag;
msg.content = stripDcpIdTags(msg.content) + idTag;
}
} else if (role === "assistant") {
if (Array.isArray(msg.content)) {
// Insert the ID tag before any tool_use (toolCall) blocks.
// Strip echoed tags first, then insert before any tool_use (toolCall) blocks.
// Anthropic requires: thinking → text → tool_use.
// Appending after tool_use blocks violates that constraint.
const firstToolCallIdx = msg.content.findIndex(
const stripped = msg.content.filter(
(b: any) => !isDcpIdBlock(b)
);
const firstToolCallIdx = stripped.findIndex(
(b: any) => b.type === "toolCall",
);
const idBlock = { type: "text", text: idTag };
if (firstToolCallIdx === -1) {
// No tool_use blocks — append as usual
msg.content = [...msg.content, idBlock];
msg.content = [...stripped, idBlock];
} else {
// Insert immediately before the first tool_use block
msg.content = [
...msg.content.slice(0, firstToolCallIdx),
...stripped.slice(0, firstToolCallIdx),
idBlock,
...msg.content.slice(firstToolCallIdx),
...stripped.slice(firstToolCallIdx),
];
}
} else if (typeof msg.content === "string") {
msg.content = msg.content + idTag;
msg.content = stripDcpIdTags(msg.content) + idTag;
}
}