feat: add append_diagram tool for truncation continuation

When LLM output hits maxOutputTokens mid-generation, instead of failing with an error loop, the system now: 1. Detects truncation (missing </root> in XML) 2. Stores partial XML and tells LLM to use new append_diagram tool 3. LLM continues generating from where it stopped 4. Fragments are accumulated until XML is complete 5. Server limits to 5 steps via stepCountIs(5) Key changes: - Add append_diagram tool definition in route.ts - Add append_diagram handler in chat-panel.tsx - Track continuation mode separately from error mode - Continuation mode has unlimited retries (not counted against limit) - Error mode still limited to MAX_AUTO_RETRY_COUNT (1) - Update system prompts to document append_diagram tool
2026-01-02 22:32:27 +08:00 · 2025-12-14 09:38:47 +09:00
parent b33e09be05
commit 62e07f5f9c
5 changed files with 346 additions and 39 deletions
--- a/app/api/chat/route.ts
+++ b/app/api/chat/route.ts
@@ -3,10 +3,12 @@ import {
    convertToModelMessages,
    createUIMessageStream,
    createUIMessageStreamResponse,
    InvalidToolInputError,
    LoadAPIKeyError,
    stepCountIs,
    streamText,
 } from "ai"
 import { jsonrepair } from "jsonrepair"
 import { z } from "zod"
 import { getAIModel, supportsPromptCaching } from "@/lib/ai-providers"
 import { findCachedResponse } from "@/lib/cached-responses"
@@ -320,6 +322,31 @@ ${userInputText}
            maxOutputTokens: parseInt(process.env.MAX_OUTPUT_TOKENS, 10),
        }),
        stopWhen: stepCountIs(5),
        // Repair truncated tool calls when maxOutputTokens is reached mid-JSON
        experimental_repairToolCall: async ({ toolCall, error }) => {
            // Only attempt repair for invalid tool input (broken JSON from truncation)
            if (
                error instanceof InvalidToolInputError ||
                error.name === "AI_InvalidToolInputError"
            ) {
                try {
                    // Use jsonrepair to fix truncated JSON
                    const repairedInput = jsonrepair(toolCall.input)
                    console.log(
                        `[repairToolCall] Repaired truncated JSON for tool: ${toolCall.toolName}`,
                    )
                    return { ...toolCall, input: repairedInput }
                } catch (repairError) {
                    console.warn(
                        `[repairToolCall] Failed to repair JSON for tool: ${toolCall.toolName}`,
                        repairError,
                    )
                    return null
                }
            }
            // Don't attempt to repair other errors (like NoSuchToolError)
            return null
        },
        messages: allMessages,
        ...(providerOptions && { providerOptions }), // This now includes all reasoning configs
        ...(headers && { headers }),
@@ -411,6 +438,26 @@ IMPORTANT: Keep edits concise:
                        ),
                }),
            },
            append_diagram: {
                description: `Continue generating diagram XML when previous display_diagram output was truncated due to length limits.
 WHEN TO USE: Only call this tool after display_diagram was truncated (you'll see an error message about truncation).
 CRITICAL INSTRUCTIONS:
 1. Do NOT include <mxGraphModel> or <root> tags - they already exist in the partial
 2. Continue from EXACTLY where your previous output stopped
 3. Generate the remaining XML including closing tags </root></mxGraphModel>
 4. If still truncated, call append_diagram again with the next fragment
 Example: If previous output ended with '<mxCell id="x" style="rounded=1', continue with ';" vertex="1">...' and complete the remaining elements.`,
                inputSchema: z.object({
                    xml: z
                        .string()
                        .describe(
                            "Continuation XML fragment to append (NO wrapper tags)",
                        ),
                }),
            },
        },
        ...(process.env.TEMPERATURE !== undefined && {
            temperature: parseFloat(process.env.TEMPERATURE),
@@ -435,6 +482,7 @@ IMPORTANT: Keep edits concise:
                return {
                    inputTokens: totalInputTokens,
                    outputTokens: usage.outputTokens ?? 0,
                    finishReason: (part as any).finishReason,
                }
            }
            return undefined
--- a/components/chat-panel.tsx
+++ b/components/chat-panel.tsx
@@ -67,7 +67,7 @@ const MAX_AUTO_RETRY_COUNT = 1
 /**
 * Check if auto-resubmit should happen based on tool errors.
- * Does NOT handle retry count or quota - those are handled by the caller.
+ * Only checks the LAST tool part (most recent tool call), not all tool parts.
 */
 function hasToolErrors(messages: ChatMessage[]): boolean {
    const lastMessage = messages[messages.length - 1]
@@ -84,7 +84,12 @@ function hasToolErrors(messages: ChatMessage[]): boolean {
        return false
    }
-    return toolParts.some((part) => part.state === TOOL_ERROR_STATE)
+    const lastToolPart = toolParts[toolParts.length - 1]
    const hasError = lastToolPart?.state === TOOL_ERROR_STATE
    console.log(
        `[hasToolErrors] lastToolPart state: ${lastToolPart?.state}, hasError: ${hasError}`,
    )
    return hasError
 }
 export default function ChatPanel({
@@ -192,6 +197,13 @@ export default function ChatPanel({
    // Ref to track consecutive auto-retry count (reset on user action)
    const autoRetryCountRef = useRef(0)
    // Ref to accumulate partial XML when output is truncated due to maxOutputTokens
    const partialXmlRef = useRef<string>("")
    // Ref to track if we're in continuation mode (truncation, not error)
    // This allows unlimited retries for continuation vs limited for errors
    const isContinuationModeRef = useRef(false)
    // Persist processed tool call IDs so collapsing the chat doesn't replay old tool outputs
    const processedToolCallsRef = useRef<Set<string>>(new Set())
@@ -216,14 +228,83 @@ export default function ChatPanel({
            if (toolCall.toolName === "display_diagram") {
                const { xml } = toolCall.input as { xml: string }
-                if (DEBUG) {
+
                // Always log truncation-related info for debugging
                console.log(`[display_diagram] === TRUNCATION DEBUG ===`)
                console.log(
                    `[display_diagram] Received XML length: ${xml.length}`,
                )
                console.log(
                    `[display_diagram] XML starts with: ${xml.substring(0, 100)}...`,
                )
                console.log(
                    `[display_diagram] XML ends with: ...${xml.substring(xml.length - 100)}`,
                )
                console.log(
                    `[display_diagram] Has </root>: ${xml.includes("</root>")}`,
                )
                console.log(
                    `[display_diagram] partialXmlRef.current length: ${partialXmlRef.current.length}`,
                )
                // Check if XML is truncated (missing </root> indicates incomplete output)
                // This happens when maxOutputTokens is reached mid-generation
                const isTruncated =
                    !xml.includes("</root>") && !xml.trim().endsWith("/>")
                // Check if this is a fresh start vs a continuation
                // Fresh start indicators: <mxGraphModel>, <root>, or <mxCell id="0"
                const isFreshStart =
                    xml.trim().startsWith("<mxGraphModel") ||
                    xml.trim().startsWith("<root") ||
                    xml.trim().startsWith('<mxCell id="0"')
                const hadPreviousPartial = partialXmlRef.current.length > 0
                console.log(`[display_diagram] isTruncated: ${isTruncated}`)
                console.log(`[display_diagram] isFreshStart: ${isFreshStart}`)
                console.log(
                    `[display_diagram] hadPreviousPartial: ${hadPreviousPartial}`,
                )
                if (isTruncated) {
                    // Store the partial XML for continuation via append_diagram
                    // Always reset to current xml since this is the first truncation
                    partialXmlRef.current = xml
                    isContinuationModeRef.current = true // Mark as continuation (not error)
                    console.log(
-                        `[display_diagram] Received XML length: ${xml.length}`,
+                        `[display_diagram] XML truncated (${xml.length} chars). Instructing LLM to use append_diagram.`,
                    )
                    // Tell LLM to use append_diagram to continue
                    // Use "output-error" to trigger auto-retry, but isContinuationModeRef tracks it's not a real error
                    const partialEnding = partialXmlRef.current.slice(-500)
                    addToolOutput({
                        tool: "display_diagram",
                        toolCallId: toolCall.toolCallId,
                        state: "output-error",
                        errorText: `Output was truncated due to length limits. Use the append_diagram tool to continue.
 Your output ended with:
 \`\`\`
 ${partialEnding}
 \`\`\`
 NEXT STEP: Call append_diagram with the continuation XML.
 - Do NOT include <mxGraphModel> or <root> tags
 - Start from EXACTLY where you stopped
 - Continue until complete with </root></mxGraphModel>`,
                    })
                    return
                }
                // Complete XML received - use it directly
                // (continuation is now handled via append_diagram tool)
                const finalXml = xml
                partialXmlRef.current = "" // Reset any partial from previous truncation
                // Wrap raw XML with full mxfile structure for draw.io
-                const fullXml = wrapWithMxFile(xml)
+                const fullXml = wrapWithMxFile(finalXml)
                // loadDiagram validates and returns error if invalid
                const validationError = onDisplayChart(fullXml)
@@ -249,7 +330,7 @@ Please fix the XML issues and call display_diagram again with corrected XML.
 Your failed XML:
 \`\`\`xml
-${xml}
+${finalXml}
 \`\`\``,
                    })
                } else {
@@ -353,6 +434,120 @@ ${currentXml || "No XML available"}
 Please retry with an adjusted search pattern or use display_diagram if retries are exhausted.`,
                    })
                }
            } else if (toolCall.toolName === "append_diagram") {
                const { xml } = toolCall.input as { xml: string }
                console.log(`[append_diagram] === APPEND DEBUG ===`)
                console.log(
                    `[append_diagram] Received fragment length: ${xml.length}`,
                )
                console.log(
                    `[append_diagram] Fragment starts with: ${xml.substring(0, 100)}...`,
                )
                console.log(
                    `[append_diagram] Fragment ends with: ...${xml.substring(xml.length - 100)}`,
                )
                console.log(
                    `[append_diagram] Current partialXmlRef length: ${partialXmlRef.current.length}`,
                )
                // Detect if LLM incorrectly started fresh instead of continuing
                const isFreshStart =
                    xml.trim().startsWith("<mxGraphModel") ||
                    xml.trim().startsWith("<root") ||
                    xml.trim().startsWith('<mxCell id="0"')
                if (isFreshStart) {
                    console.warn(
                        `[append_diagram] LLM started fresh instead of continuing! Rejecting.`,
                    )
                    addToolOutput({
                        tool: "append_diagram",
                        toolCallId: toolCall.toolCallId,
                        state: "output-error",
                        errorText: `ERROR: You started fresh with wrapper tags. Do NOT include <mxGraphModel>, <root>, or <mxCell id="0">.
 Continue from EXACTLY where the partial ended:
 \`\`\`
 ${partialXmlRef.current.slice(-500)}
 \`\`\`
 Start your continuation with the NEXT character after where it stopped.`,
                    })
                    return
                }
                // Append to accumulated XML
                partialXmlRef.current += xml
                console.log(
                    `[append_diagram] After append, total length: ${partialXmlRef.current.length}`,
                )
                // Check if XML is now complete
                const isComplete = partialXmlRef.current.includes("</root>")
                console.log(`[append_diagram] isComplete: ${isComplete}`)
                if (isComplete) {
                    // Wrap and display the complete diagram
                    const finalXml = partialXmlRef.current
                    partialXmlRef.current = "" // Reset
                    isContinuationModeRef.current = false // Continuation complete
                    console.log(
                        `[append_diagram] XML complete! Final length: ${finalXml.length}`,
                    )
                    const fullXml = wrapWithMxFile(finalXml)
                    const validationError = onDisplayChart(fullXml)
                    if (validationError) {
                        console.warn(
                            `[append_diagram] Validation error after assembly:`,
                            validationError,
                        )
                        addToolOutput({
                            tool: "append_diagram",
                            toolCallId: toolCall.toolCallId,
                            state: "output-error",
                            errorText: `Validation error after assembly: ${validationError}
 Assembled XML:
 \`\`\`xml
 ${finalXml.substring(0, 2000)}...
 \`\`\`
 Please use display_diagram with corrected XML.`,
                        })
                    } else {
                        console.log(
                            `[append_diagram] Success! Diagram displayed.`,
                        )
                        addToolOutput({
                            tool: "append_diagram",
                            toolCallId: toolCall.toolCallId,
                            output: "Diagram assembly complete and displayed successfully.",
                        })
                    }
                } else {
                    // Still incomplete - signal to continue (stay in continuation mode)
                    console.log(
                        `[append_diagram] Still incomplete, asking for more.`,
                    )
                    // isContinuationModeRef.current stays true
                    addToolOutput({
                        tool: "append_diagram",
                        toolCallId: toolCall.toolCallId,
                        state: "output-error",
                        errorText: `XML still incomplete (missing </root>). Call append_diagram again to continue.
 Current ending:
 \`\`\`
 ${partialXmlRef.current.slice(-500)}
 \`\`\`
 Continue from EXACTLY where you stopped.`,
                    })
                }
            }
        },
        onError: (error) => {
@@ -398,6 +593,12 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
            const metadata = message?.metadata as
                | Record<string, unknown>
                | undefined
            // Log finish reason for debugging truncation
            console.log(`[onFinish] === FINISH DEBUG ===`)
            console.log(`[onFinish] finishReason: ${metadata?.finishReason}`)
            console.log(`[onFinish] outputTokens: ${metadata?.outputTokens}`)
            if (metadata) {
                // Use Number.isFinite to guard against NaN (typeof NaN === 'number' is true)
                const inputTokens = Number.isFinite(metadata.inputTokens)
@@ -414,65 +615,91 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
            }
        },
        sendAutomaticallyWhen: ({ messages }) => {
            console.log(`[sendAutomaticallyWhen] === RETRY DEBUG ===`)
            console.log(
                `[sendAutomaticallyWhen] isContinuationMode: ${isContinuationModeRef.current}`,
            )
            console.log(
                `[sendAutomaticallyWhen] partialXmlRef.current length: ${partialXmlRef.current.length}`,
            )
            console.log(
                `[sendAutomaticallyWhen] autoRetryCountRef.current: ${autoRetryCountRef.current}`,
            )
            const shouldRetry = hasToolErrors(
                messages as unknown as ChatMessage[],
            )
            console.log(`[sendAutomaticallyWhen] hasToolErrors: ${shouldRetry}`)
            if (!shouldRetry) {
-                // No error, reset retry count
+                // No error, reset retry count and clear state
                console.log(
                    `[sendAutomaticallyWhen] No errors - resetting state`,
                )
                autoRetryCountRef.current = 0
-                if (DEBUG) {
+                partialXmlRef.current = ""
-                    console.log("[sendAutomaticallyWhen] No errors, stopping")
+                isContinuationModeRef.current = false
                }
                return false
            }
-            // Check retry count limit
+            // Continuation mode: unlimited retries (truncation continuation, not real errors)
-            if (autoRetryCountRef.current >= MAX_AUTO_RETRY_COUNT) {
+            // Server limits to 5 steps via stepCountIs(5)
-                if (DEBUG) {
+            if (isContinuationModeRef.current) {
-                    console.log(
+                console.log(
-                        `[sendAutomaticallyWhen] Max retry count (${MAX_AUTO_RETRY_COUNT}) reached, stopping`,
+                    `[sendAutomaticallyWhen] Continuation mode - allowing retry without counting`,
-                    )
+                )
-                }
+                // Don't count against retry limit for continuation
-                toast.error(
+                // Quota checks still apply below
-                    `Auto-retry limit reached (${MAX_AUTO_RETRY_COUNT}). Please try again manually.`,
+            } else {
                // Regular error: check retry count limit
                if (autoRetryCountRef.current >= MAX_AUTO_RETRY_COUNT) {
                    console.log(
                        `[sendAutomaticallyWhen] Max error retry count (${MAX_AUTO_RETRY_COUNT}) reached, stopping`,
                    )
                    toast.error(
                        `Auto-retry limit reached (${MAX_AUTO_RETRY_COUNT}). Please try again manually.`,
                    )
                    autoRetryCountRef.current = 0
                    partialXmlRef.current = ""
                    isContinuationModeRef.current = false
                    return false
                }
                // Increment retry count for actual errors
                autoRetryCountRef.current++
                console.log(
                    `[sendAutomaticallyWhen] Error retry ${autoRetryCountRef.current}/${MAX_AUTO_RETRY_COUNT}`,
                )
                autoRetryCountRef.current = 0
                return false
            }
            // Check quota limits before auto-retry
            const tokenLimitCheck = quotaManager.checkTokenLimit()
            if (!tokenLimitCheck.allowed) {
-                if (DEBUG) {
+                console.log(
-                    console.log(
+                    "[sendAutomaticallyWhen] Token limit exceeded, stopping",
-                        "[sendAutomaticallyWhen] Token limit exceeded, stopping",
+                )
                    )
                }
                quotaManager.showTokenLimitToast(tokenLimitCheck.used)
                autoRetryCountRef.current = 0
                partialXmlRef.current = ""
                isContinuationModeRef.current = false
                return false
            }
            const tpmCheck = quotaManager.checkTPMLimit()
            if (!tpmCheck.allowed) {
-                if (DEBUG) {
+                console.log(
-                    console.log(
+                    "[sendAutomaticallyWhen] TPM limit exceeded, stopping",
-                        "[sendAutomaticallyWhen] TPM limit exceeded, stopping",
+                )
                    )
                }
                quotaManager.showTPMLimitToast()
                autoRetryCountRef.current = 0
                partialXmlRef.current = ""
                isContinuationModeRef.current = false
                return false
            }
-            // Increment retry count and allow retry
+            // Allow retry
-            autoRetryCountRef.current++
+            console.log(
-            if (DEBUG) {
+                `[sendAutomaticallyWhen] Allowing retry${isContinuationModeRef.current ? " [continuation mode]" : ""}`,
-                console.log(
+            )
                    `[sendAutomaticallyWhen] Retrying (${autoRetryCountRef.current}/${MAX_AUTO_RETRY_COUNT})`,
                )
            }
            return true
        },
    })
@@ -817,8 +1044,10 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
        previousXml: string,
        sessionId: string,
    ) => {
-        // Reset auto-retry count on user-initiated message
+        // Reset all retry/continuation state on user-initiated message
        autoRetryCountRef.current = 0
        partialXmlRef.current = ""
        isContinuationModeRef.current = false
        const config = getAIConfig()
--- a/lib/system-prompts.ts
+++ b/lib/system-prompts.ts
@@ -42,11 +42,18 @@ description: Edit specific parts of the EXISTING diagram. Use this when making s
 parameters: {
  edits: Array<{search: string, replace: string}>
 }
 ---Tool3---
 tool name: append_diagram
 description: Continue generating diagram XML when display_diagram was truncated due to output length limits. Only use this after display_diagram truncation.
 parameters: {
  xml: string  // Continuation fragment (NO wrapper tags like <mxGraphModel> or <root>)
 }
 ---End of tools---
 IMPORTANT: Choose the right tool:
 - Use display_diagram for: Creating new diagrams, major restructuring, or when the current diagram XML is empty
 - Use edit_diagram for: Small modifications, adding/removing elements, changing text/colors, repositioning items
 - Use append_diagram for: ONLY when display_diagram was truncated due to output length - continue generating from where you stopped
 Core capabilities:
 - Generate valid, well-formed XML strings for draw.io diagrams
@@ -174,6 +181,18 @@ const EXTENDED_ADDITIONS = `
 </root>
 \`\`\`
 ### append_diagram Details
 **WHEN TO USE:** Only call this tool when display_diagram output was truncated (you'll see an error message about truncation).
 **CRITICAL RULES:**
 1. Do NOT include <mxGraphModel>, <root>, or <mxCell id="0"> - they already exist in the partial
 2. Continue from EXACTLY where your previous output stopped
 3. Generate the remaining XML including closing tags </root></mxGraphModel>
 4. If still truncated, call append_diagram again with the next fragment
 **Example:** If previous output ended with \`<mxCell id="x" style="rounded=1\`, continue with \`;" vertex="1">...\` and complete the remaining elements.
 ### edit_diagram Details
 **CRITICAL RULES:**
--- a/package-lock.json
+++ b/package-lock.json
@@ -40,6 +40,7 @@
                "clsx": "^2.1.1",
                "js-tiktoken": "^1.0.21",
                "jsdom": "^26.0.0",
                "jsonrepair": "^3.13.1",
                "lucide-react": "^0.483.0",
                "motion": "^12.23.25",
                "next": "^16.0.7",
@@ -9199,6 +9200,15 @@
                "node": ">=6"
            }
        },
        "node_modules/jsonrepair": {
            "version": "3.13.1",
            "resolved": "https://registry.npmjs.org/jsonrepair/-/jsonrepair-3.13.1.tgz",
            "integrity": "sha512-WJeiE0jGfxYmtLwBTEk8+y/mYcaleyLXWaqp5bJu0/ZTSeG0KQq/wWQ8pmnkKenEdN6pdnn6QtcoSUkbqDHWNw==",
            "license": "ISC",
            "bin": {
                "jsonrepair": "bin/cli.js"
            }
        },
        "node_modules/jsx-ast-utils": {
            "version": "3.3.5",
            "resolved": "https://registry.npmjs.org/jsx-ast-utils/-/jsx-ast-utils-3.3.5.tgz",
--- a/package.json
+++ b/package.json
@@ -44,6 +44,7 @@
        "clsx": "^2.1.1",
        "js-tiktoken": "^1.0.21",
        "jsdom": "^26.0.0",
        "jsonrepair": "^3.13.1",
        "lucide-react": "^0.483.0",
        "motion": "^12.23.25",
        "next": "^16.0.7",