refactor: simplify LLM XML format to output bare mxCells only (#254)

* refactor: simplify LLM XML format to output bare mxCells only

- Update wrapWithMxFile() to always add root cells (id=0, id=1) automatically
- LLM now generates only mxCell elements starting from id=2 (no wrapper tags)
- Update system prompts and tool descriptions with new format instructions
- Update cached responses to remove root cells and wrapper tags
- Update truncation detection to check for complete mxCell endings
- Update documentation in xml_guide.md

* fix: address PR review issues for XML format refactor

- Fix critical bug: inconsistent truncation check using old </root> pattern
- Fix stale error message referencing </root> tag
- Add isMxCellXmlComplete() helper for consistent truncation detection
- Improve regex patterns to handle any attribute order in root cells
- Update wrapWithMxFile JSDoc to document root cell removal behavior

* fix: handle non-self-closing root cells in wrapWithMxFile regex
This commit is contained in:
Dayuan Jiang
2025-12-14 14:04:44 +09:00
committed by GitHub
parent 2e24071539
commit 0851b32b67
7 changed files with 143 additions and 144 deletions

View File

@@ -29,6 +29,22 @@ const STRUCTURAL_ATTRS = [
/** Valid XML entity names */
const VALID_ENTITIES = new Set(["lt", "gt", "amp", "quot", "apos"])
// ============================================================================
// mxCell XML Helpers
// ============================================================================
/**
* Check if mxCell XML output is complete (not truncated).
* Complete XML ends with a self-closing tag (/>) or closing mxCell tag.
* @param xml - The XML string to check (can be undefined/null)
* @returns true if XML appears complete, false if truncated or empty
*/
export function isMxCellXmlComplete(xml: string | undefined | null): boolean {
const trimmed = xml?.trim() || ""
if (!trimmed) return false
return trimmed.endsWith("/>") || trimmed.endsWith("</mxCell>")
}
// ============================================================================
// XML Parsing Helpers
// ============================================================================
@@ -197,13 +213,17 @@ export function convertToLegalXml(xmlString: string): string {
/**
* Wrap XML content with the full mxfile structure required by draw.io.
* Handles cases where XML is just <root>, <mxGraphModel>, or already has <mxfile>.
* @param xml - The XML string (may be partial or complete)
* @returns Full mxfile-wrapped XML string
* Always adds root cells (id="0" and id="1") automatically.
* If input already contains root cells, they are removed to avoid duplication.
* LLM should only generate mxCell elements starting from id="2".
* @param xml - The XML string (bare mxCells, <root>, <mxGraphModel>, or full <mxfile>)
* @returns Full mxfile-wrapped XML string with root cells included
*/
export function wrapWithMxFile(xml: string): string {
if (!xml) {
return `<mxfile><diagram name="Page-1" id="page-1"><mxGraphModel><root><mxCell id="0"/><mxCell id="1" parent="0"/></root></mxGraphModel></diagram></mxfile>`
const ROOT_CELLS = '<mxCell id="0"/><mxCell id="1" parent="0"/>'
if (!xml || !xml.trim()) {
return `<mxfile><diagram name="Page-1" id="page-1"><mxGraphModel><root>${ROOT_CELLS}</root></mxGraphModel></diagram></mxfile>`
}
// Already has full structure
@@ -216,9 +236,20 @@ export function wrapWithMxFile(xml: string): string {
return `<mxfile><diagram name="Page-1" id="page-1">${xml}</diagram></mxfile>`
}
// Just <root> content - extract inner content and wrap fully
const rootContent = xml.replace(/<\/?root>/g, "").trim()
return `<mxfile><diagram name="Page-1" id="page-1"><mxGraphModel><root>${rootContent}</root></mxGraphModel></diagram></mxfile>`
// Has <root> wrapper - extract inner content
let content = xml
if (xml.includes("<root>")) {
content = xml.replace(/<\/?root>/g, "").trim()
}
// Remove any existing root cells from content (LLM shouldn't include them, but handle it gracefully)
// Use flexible patterns that match both self-closing (/>) and non-self-closing (></mxCell>) formats
content = content
.replace(/<mxCell[^>]*\bid=["']0["'][^>]*(?:\/>|><\/mxCell>)/g, "")
.replace(/<mxCell[^>]*\bid=["']1["'][^>]*(?:\/>|><\/mxCell>)/g, "")
.trim()
return `<mxfile><diagram name="Page-1" id="page-1"><mxGraphModel><root>${ROOT_CELLS}${content}</root></mxGraphModel></diagram></mxfile>`
}
/**