diff --git a/lib/utils.ts b/lib/utils.ts index bff1cdc..1266d10 100644 --- a/lib/utils.ts +++ b/lib/utils.ts @@ -1054,7 +1054,31 @@ export function autoFixXml(xml: string): { fixed: string; fixes: string[] } { fixes.push("Fixed tags to ") } - // 8b. Remove non-draw.io tags (LLM sometimes includes Claude's function calling XML) + // 8b. Fix common closing tag typos (MUST run before foreign tag removal) + const tagTypos = [ + { wrong: /<\/mxElement>/gi, right: "", name: "" }, + { wrong: /<\/mxcell>/g, right: "", name: "" }, // case sensitivity + { + wrong: /<\/mxgeometry>/g, + right: "", + name: "", + }, + { wrong: /<\/mxpoint>/g, right: "", name: "" }, + { + wrong: /<\/mxgraphmodel>/gi, + right: "", + name: "", + }, + ] + for (const { wrong, right, name } of tagTypos) { + const before = fixed + fixed = fixed.replace(wrong, right) + if (fixed !== before) { + fixes.push(`Fixed typo ${name} to ${right}`) + } + } + + // 8c. Remove non-draw.io tags (after typo fixes so lowercase variants are fixed first) // Valid draw.io tags: mxfile, diagram, mxGraphModel, root, mxCell, mxGeometry, mxPoint, Array, Object const validDrawioTags = new Set([ "mxfile", @@ -1079,7 +1103,7 @@ export function autoFixXml(xml: string): { fixed: string; fixes: string[] } { } if (foreignTags.size > 0) { console.log( - "[autoFixXml] Step 8b: Found foreign tags:", + "[autoFixXml] Step 8c: Found foreign tags:", Array.from(foreignTags), ) for (const tag of foreignTags) { @@ -1093,29 +1117,6 @@ export function autoFixXml(xml: string): { fixed: string; fixes: string[] } { ) } - // 9. Fix common closing tag typos - const tagTypos = [ - { wrong: /<\/mxElement>/gi, right: "", name: "" }, - { wrong: /<\/mxcell>/g, right: "", name: "" }, // case sensitivity - { - wrong: /<\/mxgeometry>/g, - right: "", - name: "", - }, - { wrong: /<\/mxpoint>/g, right: "", name: "" }, - { - wrong: /<\/mxgraphmodel>/gi, - right: "", - name: "", - }, - ] - for (const { wrong, right, name } of tagTypos) { - if (wrong.test(fixed)) { - fixed = fixed.replace(wrong, right) - fixes.push(`Fixed typo ${name} to ${right}`) - } - } - // 10. Fix unclosed tags by appending missing closing tags // Use parseXmlTags helper to track open tags const tagStack: string[] = [] diff --git a/package-lock.json b/package-lock.json index e5aed1c..9b801fd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -66,7 +66,7 @@ }, "devDependencies": { "@anthropic-ai/tokenizer": "^0.0.4", - "@biomejs/biome": "^2.3.8", + "@biomejs/biome": "^2.3.10", "@tailwindcss/postcss": "^4", "@tailwindcss/typography": "^0.5.19", "@types/negotiator": "^0.6.4", @@ -1420,9 +1420,9 @@ } }, "node_modules/@biomejs/biome": { - "version": "2.3.8", - "resolved": "https://registry.npmjs.org/@biomejs/biome/-/biome-2.3.8.tgz", - "integrity": "sha512-Qjsgoe6FEBxWAUzwFGFrB+1+M8y/y5kwmg5CHac+GSVOdmOIqsAiXM5QMVGZJ1eCUCLlPZtq4aFAQ0eawEUuUA==", + "version": "2.3.10", + "resolved": "https://registry.npmjs.org/@biomejs/biome/-/biome-2.3.10.tgz", + "integrity": "sha512-/uWSUd1MHX2fjqNLHNL6zLYWBbrJeG412/8H7ESuK8ewoRoMPUgHDebqKrPTx/5n6f17Xzqc9hdg3MEqA5hXnQ==", "dev": true, "license": "MIT OR Apache-2.0", "bin": { @@ -1436,20 +1436,20 @@ "url": "https://opencollective.com/biome" }, "optionalDependencies": { - "@biomejs/cli-darwin-arm64": "2.3.8", - "@biomejs/cli-darwin-x64": "2.3.8", - "@biomejs/cli-linux-arm64": "2.3.8", - "@biomejs/cli-linux-arm64-musl": "2.3.8", - "@biomejs/cli-linux-x64": "2.3.8", - "@biomejs/cli-linux-x64-musl": "2.3.8", - "@biomejs/cli-win32-arm64": "2.3.8", - "@biomejs/cli-win32-x64": "2.3.8" + "@biomejs/cli-darwin-arm64": "2.3.10", + "@biomejs/cli-darwin-x64": "2.3.10", + "@biomejs/cli-linux-arm64": "2.3.10", + "@biomejs/cli-linux-arm64-musl": "2.3.10", + "@biomejs/cli-linux-x64": "2.3.10", + "@biomejs/cli-linux-x64-musl": "2.3.10", + "@biomejs/cli-win32-arm64": "2.3.10", + "@biomejs/cli-win32-x64": "2.3.10" } }, "node_modules/@biomejs/cli-darwin-arm64": { - "version": "2.3.8", - "resolved": "https://registry.npmjs.org/@biomejs/cli-darwin-arm64/-/cli-darwin-arm64-2.3.8.tgz", - "integrity": "sha512-HM4Zg9CGQ3txTPflxD19n8MFPrmUAjaC7PQdLkugeeC0cQ+PiVrd7i09gaBS/11QKsTDBJhVg85CEIK9f50Qww==", + "version": "2.3.10", + "resolved": "https://registry.npmjs.org/@biomejs/cli-darwin-arm64/-/cli-darwin-arm64-2.3.10.tgz", + "integrity": "sha512-M6xUjtCVnNGFfK7HMNKa593nb7fwNm43fq1Mt71kpLpb+4mE7odO8W/oWVDyBVO4ackhresy1ZYO7OJcVo/B7w==", "cpu": [ "arm64" ], @@ -1464,9 +1464,9 @@ } }, "node_modules/@biomejs/cli-darwin-x64": { - "version": "2.3.8", - "resolved": "https://registry.npmjs.org/@biomejs/cli-darwin-x64/-/cli-darwin-x64-2.3.8.tgz", - "integrity": "sha512-lUDQ03D7y/qEao7RgdjWVGCu+BLYadhKTm40HkpJIi6kn8LSv5PAwRlew/DmwP4YZ9ke9XXoTIQDO1vAnbRZlA==", + "version": "2.3.10", + "resolved": "https://registry.npmjs.org/@biomejs/cli-darwin-x64/-/cli-darwin-x64-2.3.10.tgz", + "integrity": "sha512-Vae7+V6t/Avr8tVbFNjnFSTKZogZHFYl7MMH62P/J1kZtr0tyRQ9Fe0onjqjS2Ek9lmNLmZc/VR5uSekh+p1fg==", "cpu": [ "x64" ], @@ -1481,9 +1481,9 @@ } }, "node_modules/@biomejs/cli-linux-arm64": { - "version": "2.3.8", - "resolved": "https://registry.npmjs.org/@biomejs/cli-linux-arm64/-/cli-linux-arm64-2.3.8.tgz", - "integrity": "sha512-Uo1OJnIkJgSgF+USx970fsM/drtPcQ39I+JO+Fjsaa9ZdCN1oysQmy6oAGbyESlouz+rzEckLTF6DS7cWse95g==", + "version": "2.3.10", + "resolved": "https://registry.npmjs.org/@biomejs/cli-linux-arm64/-/cli-linux-arm64-2.3.10.tgz", + "integrity": "sha512-hhPw2V3/EpHKsileVOFynuWiKRgFEV48cLe0eA+G2wO4SzlwEhLEB9LhlSrVeu2mtSn205W283LkX7Fh48CaxA==", "cpu": [ "arm64" ], @@ -1498,9 +1498,9 @@ } }, "node_modules/@biomejs/cli-linux-arm64-musl": { - "version": "2.3.8", - "resolved": "https://registry.npmjs.org/@biomejs/cli-linux-arm64-musl/-/cli-linux-arm64-musl-2.3.8.tgz", - "integrity": "sha512-PShR4mM0sjksUMyxbyPNMxoKFPVF48fU8Qe8Sfx6w6F42verbwRLbz+QiKNiDPRJwUoMG1nPM50OBL3aOnTevA==", + "version": "2.3.10", + "resolved": "https://registry.npmjs.org/@biomejs/cli-linux-arm64-musl/-/cli-linux-arm64-musl-2.3.10.tgz", + "integrity": "sha512-B9DszIHkuKtOH2IFeeVkQmSMVUjss9KtHaNXquYYWCjH8IstNgXgx5B0aSBQNr6mn4RcKKRQZXn9Zu1rM3O0/A==", "cpu": [ "arm64" ], @@ -1515,9 +1515,9 @@ } }, "node_modules/@biomejs/cli-linux-x64": { - "version": "2.3.8", - "resolved": "https://registry.npmjs.org/@biomejs/cli-linux-x64/-/cli-linux-x64-2.3.8.tgz", - "integrity": "sha512-QDPMD5bQz6qOVb3kiBui0zKZXASLo0NIQ9JVJio5RveBEFgDgsvJFUvZIbMbUZT3T00M/1wdzwWXk4GIh0KaAw==", + "version": "2.3.10", + "resolved": "https://registry.npmjs.org/@biomejs/cli-linux-x64/-/cli-linux-x64-2.3.10.tgz", + "integrity": "sha512-wwAkWD1MR95u+J4LkWP74/vGz+tRrIQvr8kfMMJY8KOQ8+HMVleREOcPYsQX82S7uueco60L58Wc6M1I9WA9Dw==", "cpu": [ "x64" ], @@ -1532,9 +1532,9 @@ } }, "node_modules/@biomejs/cli-linux-x64-musl": { - "version": "2.3.8", - "resolved": "https://registry.npmjs.org/@biomejs/cli-linux-x64-musl/-/cli-linux-x64-musl-2.3.8.tgz", - "integrity": "sha512-YGLkqU91r1276uwSjiUD/xaVikdxgV1QpsicT0bIA1TaieM6E5ibMZeSyjQ/izBn4tKQthUSsVZacmoJfa3pDA==", + "version": "2.3.10", + "resolved": "https://registry.npmjs.org/@biomejs/cli-linux-x64-musl/-/cli-linux-x64-musl-2.3.10.tgz", + "integrity": "sha512-QTfHZQh62SDFdYc2nfmZFuTm5yYb4eO1zwfB+90YxUumRCR171tS1GoTX5OD0wrv4UsziMPmrePMtkTnNyYG3g==", "cpu": [ "x64" ], @@ -1549,9 +1549,9 @@ } }, "node_modules/@biomejs/cli-win32-arm64": { - "version": "2.3.8", - "resolved": "https://registry.npmjs.org/@biomejs/cli-win32-arm64/-/cli-win32-arm64-2.3.8.tgz", - "integrity": "sha512-H4IoCHvL1fXKDrTALeTKMiE7GGWFAraDwBYFquE/L/5r1927Te0mYIGseXi4F+lrrwhSWbSGt5qPFswNoBaCxg==", + "version": "2.3.10", + "resolved": "https://registry.npmjs.org/@biomejs/cli-win32-arm64/-/cli-win32-arm64-2.3.10.tgz", + "integrity": "sha512-o7lYc9n+CfRbHvkjPhm8s9FgbKdYZu5HCcGVMItLjz93EhgJ8AM44W+QckDqLA9MKDNFrR8nPbO4b73VC5kGGQ==", "cpu": [ "arm64" ], @@ -1566,9 +1566,9 @@ } }, "node_modules/@biomejs/cli-win32-x64": { - "version": "2.3.8", - "resolved": "https://registry.npmjs.org/@biomejs/cli-win32-x64/-/cli-win32-x64-2.3.8.tgz", - "integrity": "sha512-RguzimPoZWtBapfKhKjcWXBVI91tiSprqdBYu7tWhgN8pKRZhw24rFeNZTNf6UiBfjCYCi9eFQs/JzJZIhuK4w==", + "version": "2.3.10", + "resolved": "https://registry.npmjs.org/@biomejs/cli-win32-x64/-/cli-win32-x64-2.3.10.tgz", + "integrity": "sha512-pHEFgq7dUEsKnqG9mx9bXihxGI49X+ar+UBrEIj3Wqj3UCZp1rNgV+OoyjFgcXsjCWpuEAF4VJdkZr3TrWdCbQ==", "cpu": [ "x64" ], diff --git a/package.json b/package.json index b01db66..8992b24 100644 --- a/package.json +++ b/package.json @@ -76,7 +76,7 @@ }, "devDependencies": { "@anthropic-ai/tokenizer": "^0.0.4", - "@biomejs/biome": "^2.3.8", + "@biomejs/biome": "^2.3.10", "@tailwindcss/postcss": "^4", "@tailwindcss/typography": "^0.5.19", "@types/negotiator": "^0.6.4", diff --git a/packages/mcp-server/package-lock.json b/packages/mcp-server/package-lock.json index 88718b6..a8d57f3 100644 --- a/packages/mcp-server/package-lock.json +++ b/packages/mcp-server/package-lock.json @@ -1,12 +1,12 @@ { "name": "@next-ai-drawio/mcp-server", - "version": "0.1.0", + "version": "0.1.3", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@next-ai-drawio/mcp-server", - "version": "0.1.0", + "version": "0.1.3", "license": "Apache-2.0", "dependencies": { "@modelcontextprotocol/sdk": "^1.0.4", diff --git a/packages/mcp-server/package.json b/packages/mcp-server/package.json index dc88512..255d883 100644 --- a/packages/mcp-server/package.json +++ b/packages/mcp-server/package.json @@ -1,6 +1,6 @@ { "name": "@next-ai-drawio/mcp-server", - "version": "0.1.2", + "version": "0.1.3", "description": "MCP server for Next AI Draw.io - AI-powered diagram generation with real-time browser preview", "type": "module", "main": "dist/index.js", diff --git a/packages/mcp-server/src/index.ts b/packages/mcp-server/src/index.ts index f91ceb4..d0e049a 100644 --- a/packages/mcp-server/src/index.ts +++ b/packages/mcp-server/src/index.ts @@ -41,6 +41,7 @@ import { startHttpServer, } from "./http-server.js" import { log } from "./logger.js" +import { validateAndFixXml } from "./xml-validation.js" // Server configuration const config = { @@ -160,7 +161,7 @@ server.registerTool( .describe("The draw.io XML to display (mxGraphModel format)"), }, }, - async ({ xml }) => { + async ({ xml: inputXml }) => { try { if (!currentSession) { return { @@ -174,6 +175,26 @@ server.registerTool( } } + // Validate and auto-fix XML + let xml = inputXml + const { valid, error, fixed, fixes } = validateAndFixXml(xml) + if (fixed) { + xml = fixed + log.info(`XML auto-fixed: ${fixes.join(", ")}`) + } + if (!valid && error) { + log.error(`XML validation failed: ${error}`) + return { + content: [ + { + type: "text", + text: `Error: XML validation failed - ${error}`, + }, + ], + isError: true, + } + } + log.info(`Displaying diagram, ${xml.length} chars`) // Update session state @@ -274,10 +295,31 @@ server.registerTool( log.info(`Editing diagram with ${operations.length} operation(s)`) + // Validate and auto-fix new_xml for each operation + const validatedOps = operations.map((op) => { + if (op.new_xml) { + const { valid, error, fixed, fixes } = validateAndFixXml( + op.new_xml, + ) + if (fixed) { + log.info( + `Operation ${op.type} ${op.cell_id}: XML auto-fixed: ${fixes.join(", ")}`, + ) + return { ...op, new_xml: fixed } + } + if (!valid && error) { + log.warn( + `Operation ${op.type} ${op.cell_id}: XML validation failed: ${error}`, + ) + } + } + return op + }) + // Apply operations const { result, errors } = applyDiagramOperations( currentSession.xml, - operations as DiagramOperation[], + validatedOps as DiagramOperation[], ) if (errors.length > 0) { diff --git a/packages/mcp-server/src/xml-validation.ts b/packages/mcp-server/src/xml-validation.ts new file mode 100644 index 0000000..d6c2daf --- /dev/null +++ b/packages/mcp-server/src/xml-validation.ts @@ -0,0 +1,926 @@ +/** + * XML Validation and Auto-Fix for draw.io diagrams + * Copied from lib/utils.ts to avoid cross-package imports + */ + +// ============================================================================ +// Constants +// ============================================================================ + +/** Maximum XML size to process (1MB) - larger XMLs may cause performance issues */ +const MAX_XML_SIZE = 1_000_000 + +/** Maximum iterations for aggressive cell dropping to prevent infinite loops */ +const MAX_DROP_ITERATIONS = 10 + +/** Structural attributes that should not be duplicated in draw.io */ +const STRUCTURAL_ATTRS = [ + "edge", + "parent", + "source", + "target", + "vertex", + "connectable", +] + +/** Valid XML entity names */ +const VALID_ENTITIES = new Set(["lt", "gt", "amp", "quot", "apos"]) + +// ============================================================================ +// XML Parsing Helpers +// ============================================================================ + +interface ParsedTag { + tag: string + tagName: string + isClosing: boolean + isSelfClosing: boolean + startIndex: number + endIndex: number +} + +/** + * Parse XML tags while properly handling quoted strings + */ +function parseXmlTags(xml: string): ParsedTag[] { + const tags: ParsedTag[] = [] + let i = 0 + + while (i < xml.length) { + const tagStart = xml.indexOf("<", i) + if (tagStart === -1) break + + // Find matching > by tracking quotes + let tagEnd = tagStart + 1 + let inQuote = false + let quoteChar = "" + + while (tagEnd < xml.length) { + const c = xml[tagEnd] + if (inQuote) { + if (c === quoteChar) inQuote = false + } else { + if (c === '"' || c === "'") { + inQuote = true + quoteChar = c + } else if (c === ">") { + break + } + } + tagEnd++ + } + + if (tagEnd >= xml.length) break + + const tag = xml.substring(tagStart, tagEnd + 1) + i = tagEnd + 1 + + const tagMatch = /^<(\/?)([a-zA-Z][a-zA-Z0-9:_-]*)/.exec(tag) + if (!tagMatch) continue + + tags.push({ + tag, + tagName: tagMatch[2], + isClosing: tagMatch[1] === "/", + isSelfClosing: tag.endsWith("/>"), + startIndex: tagStart, + endIndex: tagEnd, + }) + } + + return tags +} + +// ============================================================================ +// Validation Helper Functions +// ============================================================================ + +/** Check for duplicate structural attributes in a tag */ +function checkDuplicateAttributes(xml: string): string | null { + const structuralSet = new Set(STRUCTURAL_ATTRS) + const tagPattern = /<[^>]+>/g + let tagMatch + while ((tagMatch = tagPattern.exec(xml)) !== null) { + const tag = tagMatch[0] + const attrPattern = /\s([a-zA-Z_:][a-zA-Z0-9_:.-]*)\s*=/g + const attributes = new Map() + let attrMatch + while ((attrMatch = attrPattern.exec(tag)) !== null) { + const attrName = attrMatch[1] + attributes.set(attrName, (attributes.get(attrName) || 0) + 1) + } + const duplicates = Array.from(attributes.entries()) + .filter(([name, count]) => count > 1 && structuralSet.has(name)) + .map(([name]) => name) + if (duplicates.length > 0) { + return `Invalid XML: Duplicate structural attribute(s): ${duplicates.join(", ")}. Remove duplicate attributes.` + } + } + return null +} + +/** Check for duplicate IDs in XML */ +function checkDuplicateIds(xml: string): string | null { + const idPattern = /\bid\s*=\s*["']([^"']+)["']/gi + const ids = new Map() + let idMatch + while ((idMatch = idPattern.exec(xml)) !== null) { + const id = idMatch[1] + ids.set(id, (ids.get(id) || 0) + 1) + } + const duplicateIds = Array.from(ids.entries()) + .filter(([, count]) => count > 1) + .map(([id, count]) => `'${id}' (${count}x)`) + if (duplicateIds.length > 0) { + return `Invalid XML: Found duplicate ID(s): ${duplicateIds.slice(0, 3).join(", ")}. All id attributes must be unique.` + } + return null +} + +/** Check for tag mismatches using parsed tags */ +function checkTagMismatches(xml: string): string | null { + const xmlWithoutComments = xml.replace(//g, "") + const tags = parseXmlTags(xmlWithoutComments) + const tagStack: string[] = [] + + for (const { tagName, isClosing, isSelfClosing } of tags) { + if (isClosing) { + if (tagStack.length === 0) { + return `Invalid XML: Closing tag without matching opening tag` + } + const expected = tagStack.pop() + if (expected?.toLowerCase() !== tagName.toLowerCase()) { + return `Invalid XML: Expected closing tag but found ` + } + } else if (!isSelfClosing) { + tagStack.push(tagName) + } + } + if (tagStack.length > 0) { + return `Invalid XML: Document has ${tagStack.length} unclosed tag(s): ${tagStack.join(", ")}` + } + return null +} + +/** Check for invalid character references */ +function checkCharacterReferences(xml: string): string | null { + const charRefPattern = /&#x?[^;]+;?/g + let charMatch + while ((charMatch = charRefPattern.exec(xml)) !== null) { + const ref = charMatch[0] + if (ref.startsWith("&#x")) { + if (!ref.endsWith(";")) { + return `Invalid XML: Missing semicolon after hex reference: ${ref}` + } + const hexDigits = ref.substring(3, ref.length - 1) + if (hexDigits.length === 0 || !/^[0-9a-fA-F]+$/.test(hexDigits)) { + return `Invalid XML: Invalid hex character reference: ${ref}` + } + } else if (ref.startsWith("&#")) { + if (!ref.endsWith(";")) { + return `Invalid XML: Missing semicolon after decimal reference: ${ref}` + } + const decDigits = ref.substring(2, ref.length - 1) + if (decDigits.length === 0 || !/^[0-9]+$/.test(decDigits)) { + return `Invalid XML: Invalid decimal character reference: ${ref}` + } + } + } + return null +} + +/** Check for invalid entity references */ +function checkEntityReferences(xml: string): string | null { + const xmlWithoutComments = xml.replace(//g, "") + const bareAmpPattern = /&(?!(?:lt|gt|amp|quot|apos|#))/g + if (bareAmpPattern.test(xmlWithoutComments)) { + return "Invalid XML: Found unescaped & character(s). Replace & with &" + } + const invalidEntityPattern = /&([a-zA-Z][a-zA-Z0-9]*);/g + let entityMatch + while ( + (entityMatch = invalidEntityPattern.exec(xmlWithoutComments)) !== null + ) { + if (!VALID_ENTITIES.has(entityMatch[1])) { + return `Invalid XML: Invalid entity reference: &${entityMatch[1]}; - use only valid XML entities (lt, gt, amp, quot, apos)` + } + } + return null +} + +/** Check for nested mxCell tags using regex */ +function checkNestedMxCells(xml: string): string | null { + const cellTagPattern = /<\/?mxCell[^>]*>/g + const cellStack: number[] = [] + let cellMatch + while ((cellMatch = cellTagPattern.exec(xml)) !== null) { + const tag = cellMatch[0] + if (tag.startsWith("")) { + if (cellStack.length > 0) cellStack.pop() + } else if (!tag.endsWith("/>")) { + const isLabelOrGeometry = + /\sas\s*=\s*["'](valueLabel|geometry)["']/.test(tag) + if (!isLabelOrGeometry) { + cellStack.push(cellMatch.index) + if (cellStack.length > 1) { + return "Invalid XML: Found nested mxCell tags. Cells should be siblings, not nested inside other mxCell elements." + } + } + } + } + return null +} + +// ============================================================================ +// Main Validation Function +// ============================================================================ + +/** + * Validates draw.io XML structure for common issues + * Uses DOM parsing + additional regex checks for high accuracy + * @param xml - The XML string to validate + * @returns null if valid, error message string if invalid + */ +export function validateMxCellStructure(xml: string): string | null { + // Size check for performance + if (xml.length > MAX_XML_SIZE) { + console.warn( + `[validateMxCellStructure] XML size (${xml.length}) exceeds ${MAX_XML_SIZE} bytes, may cause performance issues`, + ) + } + + // 0. First use DOM parser to catch syntax errors (most accurate) + try { + const parser = new DOMParser() + const doc = parser.parseFromString(xml, "text/xml") + const parseError = doc.querySelector("parsererror") + if (parseError) { + return `Invalid XML: The XML contains syntax errors (likely unescaped special characters like <, >, & in attribute values). Please escape special characters: use < for <, > for >, & for &, " for ". Regenerate the diagram with properly escaped values.` + } + + // DOM-based checks for nested mxCell + const allCells = doc.querySelectorAll("mxCell") + for (const cell of allCells) { + if (cell.parentElement?.tagName === "mxCell") { + const id = cell.getAttribute("id") || "unknown" + return `Invalid XML: Found nested mxCell (id="${id}"). Cells should be siblings, not nested inside other mxCell elements.` + } + } + } catch (error) { + console.warn( + "[validateMxCellStructure] DOMParser threw unexpected error, falling back to regex validation:", + error, + ) + } + + // 1. Check for CDATA wrapper (invalid at document root) + if (/^\s* from end" + } + + // 2. Check for duplicate structural attributes + const dupAttrError = checkDuplicateAttributes(xml) + if (dupAttrError) { + return dupAttrError + } + + // 3. Check for unescaped < in attribute values + const attrValuePattern = /=\s*"([^"]*)"/g + let attrValMatch + while ((attrValMatch = attrValuePattern.exec(xml)) !== null) { + const value = attrValMatch[1] + if (//g + let commentMatch + while ((commentMatch = commentPattern.exec(xml)) !== null) { + if (/--/.test(commentMatch[1])) { + return "Invalid XML: Comment contains -- (double hyphen) which is not allowed" + } + } + + // 8. Check for unescaped entity references and invalid entity names + const entityError = checkEntityReferences(xml) + if (entityError) { + return entityError + } + + // 9. Check for empty id attributes on mxCell + if (/]*\sid\s*=\s*["']\s*["'][^>]*>/g.test(xml)) { + return "Invalid XML: Found mxCell element(s) with empty id attribute" + } + + // 10. Check for nested mxCell tags + const nestedCellError = checkNestedMxCells(xml) + if (nestedCellError) { + return nestedCellError + } + + return null +} + +// ============================================================================ +// Auto-Fix Function +// ============================================================================ + +/** + * Attempts to auto-fix common XML issues in draw.io diagrams + * @param xml - The XML string to fix + * @returns Object with fixed XML and list of fixes applied + */ +export function autoFixXml(xml: string): { fixed: string; fixes: string[] } { + let fixed = xml + const fixes: string[] = [] + + // 0. Fix JSON-escaped XML + if (/=\\"/.test(fixed)) { + fixed = fixed.replace(/\\"/g, '"') + fixed = fixed.replace(/\\n/g, "\n") + fixes.push("Fixed JSON-escaped XML") + } + + // 1. Remove CDATA wrapper + if (/^\s*\s*$/, "") + fixes.push("Removed CDATA wrapper") + } + + // 2. Remove text before XML declaration or root element + const xmlStart = fixed.search(/<(\?xml|mxGraphModel|mxfile)/i) + if (xmlStart > 0 && !/^<[a-zA-Z]/.test(fixed.trim())) { + fixed = fixed.substring(xmlStart) + fixes.push("Removed text before XML root") + } + + // 3. Fix duplicate attributes + let dupAttrFixed = false + fixed = fixed.replace(/<[^>]+>/g, (tag) => { + let newTag = tag + for (const attr of STRUCTURAL_ATTRS) { + const attrRegex = new RegExp( + `\\s${attr}\\s*=\\s*["'][^"']*["']`, + "gi", + ) + const matches = tag.match(attrRegex) + if (matches && matches.length > 1) { + let firstKept = false + newTag = newTag.replace(attrRegex, (m) => { + if (!firstKept) { + firstKept = true + return m + } + dupAttrFixed = true + return "" + }) + } + } + return newTag + }) + if (dupAttrFixed) { + fixes.push("Removed duplicate structural attributes") + } + + // 4. Fix unescaped & characters + const ampersandPattern = + /&(?!(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);)/g + if (ampersandPattern.test(fixed)) { + fixed = fixed.replace( + /&(?!(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);)/g, + "&", + ) + fixes.push("Escaped unescaped & characters") + } + + // 5. Fix invalid entity names (double-escaping) + const invalidEntities = [ + { pattern: /&quot;/g, replacement: """, name: "&quot;" }, + { pattern: /&lt;/g, replacement: "<", name: "&lt;" }, + { pattern: /&gt;/g, replacement: ">", name: "&gt;" }, + { pattern: /&apos;/g, replacement: "'", name: "&apos;" }, + { pattern: /&amp;/g, replacement: "&", name: "&amp;" }, + ] + for (const { pattern, replacement, name } of invalidEntities) { + if (pattern.test(fixed)) { + fixed = fixed.replace(pattern, replacement) + fixes.push(`Fixed double-escaped entity ${name}`) + } + } + + // 6. Fix malformed attribute quotes + const malformedQuotePattern = /(\s[a-zA-Z][a-zA-Z0-9_:-]*)="/ + if (malformedQuotePattern.test(fixed)) { + fixed = fixed.replace( + /(\s[a-zA-Z][a-zA-Z0-9_:-]*)="([^&]*?)"/g, + '$1="$2"', + ) + fixes.push("Fixed malformed attribute quotes") + } + + // 7. Fix malformed closing tags + const malformedClosingTag = /<\/([a-zA-Z][a-zA-Z0-9]*)\s*\/>/g + if (malformedClosingTag.test(fixed)) { + fixed = fixed.replace(/<\/([a-zA-Z][a-zA-Z0-9]*)\s*\/>/g, "") + fixes.push("Fixed malformed closing tags") + } + + // 8. Fix missing space between attributes + const missingSpacePattern = /("[^"]*")([a-zA-Z][a-zA-Z0-9_:-]*=)/g + if (missingSpacePattern.test(fixed)) { + fixed = fixed.replace(/("[^"]*")([a-zA-Z][a-zA-Z0-9_:-]*=)/g, "$1 $2") + fixes.push("Added missing space between attributes") + } + + // 9. Fix unescaped quotes in style color values + const quotedColorPattern = /;([a-zA-Z]*[Cc]olor)="#/ + if (quotedColorPattern.test(fixed)) { + fixed = fixed.replace(/;([a-zA-Z]*[Cc]olor)="#/g, ";$1=#") + fixes.push("Removed quotes around color values in style") + } + + // 10. Fix unescaped < in attribute values + const attrPattern = /(=\s*")([^"]*?)(<)([^"]*?)(")/g + let attrMatch + let hasUnescapedLt = false + while ((attrMatch = attrPattern.exec(fixed)) !== null) { + if (!attrMatch[3].startsWith("<")) { + hasUnescapedLt = true + break + } + } + if (hasUnescapedLt) { + fixed = fixed.replace(/=\s*"([^"]*)"/g, (_match, value) => { + const escaped = value.replace(/ { + if (/^[0-9a-fA-F]+$/.test(hex) && hex.length > 0) { + return match + } + invalidHexRefs.push(match) + return "" + }) + if (invalidHexRefs.length > 0) { + fixes.push( + `Removed ${invalidHexRefs.length} invalid hex character reference(s)`, + ) + } + + // 12. Fix invalid decimal character references + const invalidDecRefs: string[] = [] + fixed = fixed.replace(/&#([^x][^;]*);/g, (match, dec) => { + if (/^[0-9]+$/.test(dec) && dec.length > 0) { + return match + } + invalidDecRefs.push(match) + return "" + }) + if (invalidDecRefs.length > 0) { + fixes.push( + `Removed ${invalidDecRefs.length} invalid decimal character reference(s)`, + ) + } + + // 13. Fix invalid comment syntax + fixed = fixed.replace(//g, (match, content) => { + if (/--/.test(content)) { + let fixedContent = content + while (/--/.test(fixedContent)) { + fixedContent = fixedContent.replace(/--/g, "-") + } + fixes.push("Fixed invalid comment syntax") + return `` + } + return match + }) + + // 14. Fix tags to + const hasCellTags = /<\/?Cell[\s>]/i.test(fixed) + if (hasCellTags) { + fixed = fixed.replace(//gi, "") + fixed = fixed.replace(/<\/Cell>/gi, "") + fixes.push("Fixed tags to ") + } + + // 15. Fix common closing tag typos (MUST run before foreign tag removal) + const tagTypos = [ + { wrong: /<\/mxElement>/gi, right: "", name: "" }, + { wrong: /<\/mxcell>/g, right: "", name: "" }, + { + wrong: /<\/mxgeometry>/g, + right: "", + name: "", + }, + { wrong: /<\/mxpoint>/g, right: "", name: "" }, + { + wrong: /<\/mxgraphmodel>/gi, + right: "", + name: "", + }, + ] + for (const { wrong, right, name } of tagTypos) { + const before = fixed + fixed = fixed.replace(wrong, right) + if (fixed !== before) { + fixes.push(`Fixed typo ${name} to ${right}`) + } + } + + // 16. Remove non-draw.io tags (after typo fixes so lowercase variants are fixed first) + const validDrawioTags = new Set([ + "mxfile", + "diagram", + "mxGraphModel", + "root", + "mxCell", + "mxGeometry", + "mxPoint", + "Array", + "Object", + "mxRectangle", + ]) + const foreignTagPattern = /<\/?([a-zA-Z][a-zA-Z0-9_]*)[^>]*>/g + let foreignMatch + const foreignTags = new Set() + while ((foreignMatch = foreignTagPattern.exec(fixed)) !== null) { + const tagName = foreignMatch[1] + if (!validDrawioTags.has(tagName)) { + foreignTags.add(tagName) + } + } + if (foreignTags.size > 0) { + for (const tag of foreignTags) { + fixed = fixed.replace(new RegExp(`<${tag}[^>]*>`, "gi"), "") + fixed = fixed.replace(new RegExp(``, "gi"), "") + } + fixes.push( + `Removed foreign tags: ${Array.from(foreignTags).join(", ")}`, + ) + } + + // 17. Fix unclosed tags + const tagStack: string[] = [] + const parsedTags = parseXmlTags(fixed) + + for (const { tagName, isClosing, isSelfClosing } of parsedTags) { + if (isClosing) { + const lastIdx = tagStack.lastIndexOf(tagName) + if (lastIdx !== -1) { + tagStack.splice(lastIdx, 1) + } + } else if (!isSelfClosing) { + tagStack.push(tagName) + } + } + + if (tagStack.length > 0) { + const tagsToClose: string[] = [] + for (const tagName of tagStack.reverse()) { + const openCount = ( + fixed.match(new RegExp(`<${tagName}[\\s>]`, "gi")) || [] + ).length + const closeCount = ( + fixed.match(new RegExp(``, "gi")) || [] + ).length + if (openCount > closeCount) { + tagsToClose.push(tagName) + } + } + if (tagsToClose.length > 0) { + const closingTags = tagsToClose.map((t) => ``).join("\n") + fixed = fixed.trimEnd() + "\n" + closingTags + fixes.push( + `Closed ${tagsToClose.length} unclosed tag(s): ${tagsToClose.join(", ")}`, + ) + } + } + + // 18. Remove extra closing tags + const tagCounts = new Map< + string, + { opens: number; closes: number; selfClosing: number } + >() + const fullTagPattern = /<(\/?[a-zA-Z][a-zA-Z0-9]*)[^>]*>/g + let tagCountMatch + while ((tagCountMatch = fullTagPattern.exec(fixed)) !== null) { + const fullMatch = tagCountMatch[0] + const tagPart = tagCountMatch[1] + const isClosing = tagPart.startsWith("/") + const isSelfClosing = fullMatch.endsWith("/>") + const tagName = isClosing ? tagPart.slice(1) : tagPart + + let counts = tagCounts.get(tagName) + if (!counts) { + counts = { opens: 0, closes: 0, selfClosing: 0 } + tagCounts.set(tagName, counts) + } + if (isClosing) { + counts.closes++ + } else if (isSelfClosing) { + counts.selfClosing++ + } else { + counts.opens++ + } + } + + for (const [tagName, counts] of tagCounts) { + const extraCloses = counts.closes - counts.opens + if (extraCloses > 0) { + let removed = 0 + const closeTagPattern = new RegExp(``, "g") + const matches = [...fixed.matchAll(closeTagPattern)] + for ( + let i = matches.length - 1; + i >= 0 && removed < extraCloses; + i-- + ) { + const match = matches[i] + const idx = match.index ?? 0 + fixed = fixed.slice(0, idx) + fixed.slice(idx + match[0].length) + removed++ + } + if (removed > 0) { + fixes.push( + `Removed ${removed} extra closing tag(s)`, + ) + } + } + } + + // 19. Remove trailing garbage after last XML tag + const closingTagPattern = /<\/[a-zA-Z][a-zA-Z0-9]*>|\/>/g + let lastValidTagEnd = -1 + let closingMatch + while ((closingMatch = closingTagPattern.exec(fixed)) !== null) { + lastValidTagEnd = closingMatch.index + closingMatch[0].length + } + if (lastValidTagEnd > 0 && lastValidTagEnd < fixed.length) { + const trailing = fixed.slice(lastValidTagEnd).trim() + if (trailing) { + fixed = fixed.slice(0, lastValidTagEnd) + fixes.push("Removed trailing garbage after last XML tag") + } + } + + // 20. Fix nested mxCell by flattening + const lines = fixed.split("\n") + let newLines: string[] = [] + let nestedFixed = 0 + let extraClosingToRemove = 0 + + for (let i = 0; i < lines.length; i++) { + const line = lines[i] + const nextLine = lines[i + 1] + + if ( + nextLine && + /") && + !nextLine.includes("/>") + ) { + const id1 = line.match(/\bid\s*=\s*["']([^"']+)["']/)?.[1] + const id2 = nextLine.match(/\bid\s*=\s*["']([^"']+)["']/)?.[1] + + if (id1 && id1 === id2) { + nestedFixed++ + extraClosingToRemove++ + continue + } + } + + if (extraClosingToRemove > 0 && /^\s*<\/mxCell>\s*$/.test(line)) { + extraClosingToRemove-- + continue + } + + newLines.push(line) + } + + if (nestedFixed > 0) { + fixed = newLines.join("\n") + fixes.push(`Flattened ${nestedFixed} duplicate-ID nested mxCell(s)`) + } + + // 21. Fix true nested mxCell (different IDs) + const lines2 = fixed.split("\n") + newLines = [] + let trueNestedFixed = 0 + let cellDepth = 0 + let pendingCloseRemoval = 0 + + for (let i = 0; i < lines2.length; i++) { + const line = lines2[i] + const trimmed = line.trim() + + const isOpenCell = /") + const isCloseCell = trimmed === "" + + if (isOpenCell) { + if (cellDepth > 0) { + const indent = line.match(/^(\s*)/)?.[1] || "" + newLines.push(indent + "") + trueNestedFixed++ + pendingCloseRemoval++ + } + cellDepth = 1 + newLines.push(line) + } else if (isCloseCell) { + if (pendingCloseRemoval > 0) { + pendingCloseRemoval-- + } else { + cellDepth = Math.max(0, cellDepth - 1) + newLines.push(line) + } + } else { + newLines.push(line) + } + } + + if (trueNestedFixed > 0) { + fixed = newLines.join("\n") + fixes.push(`Fixed ${trueNestedFixed} true nested mxCell(s)`) + } + + // 22. Fix duplicate IDs by appending suffix + const seenIds = new Map() + const duplicateIds: string[] = [] + + const idPattern = /\bid\s*=\s*["']([^"']+)["']/gi + let idMatch + while ((idMatch = idPattern.exec(fixed)) !== null) { + const id = idMatch[1] + seenIds.set(id, (seenIds.get(id) || 0) + 1) + } + + for (const [id, count] of seenIds) { + if (count > 1) duplicateIds.push(id) + } + + if (duplicateIds.length > 0) { + const idCounters = new Map() + fixed = fixed.replace(/\bid\s*=\s*["']([^"']+)["']/gi, (match, id) => { + if (!duplicateIds.includes(id)) return match + + const count = idCounters.get(id) || 0 + idCounters.set(id, count + 1) + + if (count === 0) return match + + const newId = `${id}_dup${count}` + return match.replace(id, newId) + }) + fixes.push(`Renamed ${duplicateIds.length} duplicate ID(s)`) + } + + // 23. Fix empty id attributes + let emptyIdCount = 0 + fixed = fixed.replace( + /]*)\sid\s*=\s*["']\s*["']([^>]*)>/g, + (_match, before, after) => { + emptyIdCount++ + const newId = `cell_${Date.now()}_${emptyIdCount}` + return `` + }, + ) + if (emptyIdCount > 0) { + fixes.push(`Generated ${emptyIdCount} missing ID(s)`) + } + + // 24. Aggressive: drop broken mxCell elements + if (typeof DOMParser !== "undefined") { + let droppedCells = 0 + let maxIterations = MAX_DROP_ITERATIONS + while (maxIterations-- > 0) { + const parser = new DOMParser() + const doc = parser.parseFromString(fixed, "text/xml") + const parseError = doc.querySelector("parsererror") + if (!parseError) break + + const errText = parseError.textContent || "" + const match = errText.match(/(\d+):\d+:/) + if (!match) break + + const errLine = parseInt(match[1], 10) - 1 + const lines = fixed.split("\n") + + let cellStart = errLine + let cellEnd = errLine + + while (cellStart > 0 && !lines[cellStart].includes("") || + lines[cellEnd].trim().endsWith("/>") + ) { + break + } + cellEnd++ + } + + lines.splice(cellStart, cellEnd - cellStart + 1) + fixed = lines.join("\n") + droppedCells++ + } + if (droppedCells > 0) { + fixes.push(`Dropped ${droppedCells} unfixable mxCell element(s)`) + } + } + + return { fixed, fixes } +} + +// ============================================================================ +// Combined Validation and Fix +// ============================================================================ + +/** + * Validates XML and attempts to fix if invalid + * @param xml - The XML string to validate and potentially fix + * @returns Object with validation result, fixed XML if applicable, and fixes applied + */ +export function validateAndFixXml(xml: string): { + valid: boolean + error: string | null + fixed: string | null + fixes: string[] +} { + // First validation attempt + let error = validateMxCellStructure(xml) + + if (!error) { + return { valid: true, error: null, fixed: null, fixes: [] } + } + + // Try to fix + const { fixed, fixes } = autoFixXml(xml) + + // Validate the fixed version + error = validateMxCellStructure(fixed) + + if (!error) { + return { valid: true, error: null, fixed, fixes } + } + + // Still invalid after fixes + return { + valid: false, + error, + fixed: fixes.length > 0 ? fixed : null, + fixes, + } +} + +/** + * Check if mxCell XML output is complete (not truncated). + * @param xml - The XML string to check (can be undefined/null) + * @returns true if XML appears complete, false if truncated or empty + */ +export function isMxCellXmlComplete(xml: string | undefined | null): boolean { + let trimmed = xml?.trim() || "" + if (!trimmed) return false + + // Strip wrapper tags if present + let prev = "" + while (prev !== trimmed) { + prev = trimmed + trimmed = trimmed + .replace(/<\/mxParameter>\s*$/i, "") + .replace(/<\/invoke>\s*$/i, "") + .replace(/<\/antml:parameter>\s*$/i, "") + .replace(/<\/antml:invoke>\s*$/i, "") + .trim() + } + + return trimmed.endsWith("/>") || trimmed.endsWith("") +}