refactor: simplify LLM XML format to output bare mxCells only (#254)

* refactor: simplify LLM XML format to output bare mxCells only - Update wrapWithMxFile() to always add root cells (id=0, id=1) automatically - LLM now generates only mxCell elements starting from id=2 (no wrapper tags) - Update system prompts and tool descriptions with new format instructions - Update cached responses to remove root cells and wrapper tags - Update truncation detection to check for complete mxCell endings - Update documentation in xml_guide.md * fix: address PR review issues for XML format refactor - Fix critical bug: inconsistent truncation check using old </root> pattern - Fix stale error message referencing </root> tag - Add isMxCellXmlComplete() helper for consistent truncation detection - Improve regex patterns to handle any attribute order in root cells - Update wrapWithMxFile JSDoc to document root cell removal behavior * fix: handle non-self-closing root cells in wrapWithMxFile regex
2026-01-02 22:32:27 +08:00 · 2025-12-14 14:04:44 +09:00
parent 2e24071539
commit 0851b32b67
7 changed files with 143 additions and 144 deletions
--- a/lib/utils.ts
+++ b/lib/utils.ts
@@ -29,6 +29,22 @@ const STRUCTURAL_ATTRS = [
 /** Valid XML entity names */
 const VALID_ENTITIES = new Set(["lt", "gt", "amp", "quot", "apos"])

+// ============================================================================
+// mxCell XML Helpers
+// ============================================================================
+
+/**
+ * Check if mxCell XML output is complete (not truncated).
+ * Complete XML ends with a self-closing tag (/>) or closing mxCell tag.
+ * @param xml - The XML string to check (can be undefined/null)
+ * @returns true if XML appears complete, false if truncated or empty
+ */
+export function isMxCellXmlComplete(xml: string | undefined | null): boolean {
+    const trimmed = xml?.trim() || ""
+    if (!trimmed) return false
+    return trimmed.endsWith("/>") || trimmed.endsWith("</mxCell>")
+}
+
 // ============================================================================
 // XML Parsing Helpers
 // ============================================================================
@@ -197,13 +213,17 @@ export function convertToLegalXml(xmlString: string): string {

 /**
 * Wrap XML content with the full mxfile structure required by draw.io.
- * Handles cases where XML is just <root>, <mxGraphModel>, or already has <mxfile>.
- * @param xml - The XML string (may be partial or complete)
- * @returns Full mxfile-wrapped XML string
+ * Always adds root cells (id="0" and id="1") automatically.
+ * If input already contains root cells, they are removed to avoid duplication.
+ * LLM should only generate mxCell elements starting from id="2".
+ * @param xml - The XML string (bare mxCells, <root>, <mxGraphModel>, or full <mxfile>)
+ * @returns Full mxfile-wrapped XML string with root cells included
 */
 export function wrapWithMxFile(xml: string): string {
-    if (!xml) {
-        return `<mxfile><diagram name="Page-1" id="page-1"><mxGraphModel><root><mxCell id="0"/><mxCell id="1" parent="0"/></root></mxGraphModel></diagram></mxfile>`
+    const ROOT_CELLS = '<mxCell id="0"/><mxCell id="1" parent="0"/>'
+
+    if (!xml || !xml.trim()) {
+        return `<mxfile><diagram name="Page-1" id="page-1"><mxGraphModel><root>${ROOT_CELLS}</root></mxGraphModel></diagram></mxfile>`
    }

    // Already has full structure
@@ -216,9 +236,20 @@ export function wrapWithMxFile(xml: string): string {
        return `<mxfile><diagram name="Page-1" id="page-1">${xml}</diagram></mxfile>`
    }

-    // Just <root> content - extract inner content and wrap fully
-    const rootContent = xml.replace(/<\/?root>/g, "").trim()
-    return `<mxfile><diagram name="Page-1" id="page-1"><mxGraphModel><root>${rootContent}</root></mxGraphModel></diagram></mxfile>`
+    // Has <root> wrapper - extract inner content
+    let content = xml
+    if (xml.includes("<root>")) {
+        content = xml.replace(/<\/?root>/g, "").trim()
+    }
+
+    // Remove any existing root cells from content (LLM shouldn't include them, but handle it gracefully)
+    // Use flexible patterns that match both self-closing (/>) and non-self-closing (></mxCell>) formats
+    content = content
+        .replace(/<mxCell[^>]*\bid=["']0["'][^>]*(?:\/>|><\/mxCell>)/g, "")
+        .replace(/<mxCell[^>]*\bid=["']1["'][^>]*(?:\/>|><\/mxCell>)/g, "")
+        .trim()
+
+    return `<mxfile><diagram name="Page-1" id="page-1"><mxGraphModel><root>${ROOT_CELLS}${content}</root></mxGraphModel></diagram></mxfile>`
 }

 /**