diff --git a/components/chat-message-display.tsx b/components/chat-message-display.tsx
index 5e00e6d..1429c31 100644
--- a/components/chat-message-display.tsx
+++ b/components/chat-message-display.tsx
@@ -29,11 +29,7 @@ import {
ReasoningTrigger,
} from "@/components/ai-elements/reasoning"
import { ScrollArea } from "@/components/ui/scroll-area"
-import {
- convertToLegalXml,
- replaceNodes,
- validateMxCellStructure,
-} from "@/lib/utils"
+import { convertToLegalXml, replaceNodes, validateAndFixXml } from "@/lib/utils"
import ExamplePanel from "./chat-example-panel"
import { CodeBlock } from "./code-block"
@@ -312,15 +308,24 @@ export function ChatMessageDisplay({
``
const replacedXML = replaceNodes(baseXML, convertedXml)
- const validationError = validateMxCellStructure(replacedXML)
- if (!validationError) {
+ // Validate and auto-fix the XML
+ const validation = validateAndFixXml(replacedXML)
+ if (validation.valid) {
previousXML.current = convertedXml
+ // Use fixed XML if available, otherwise use original
+ const xmlToLoad = validation.fixed || replacedXML
+ if (validation.fixes.length > 0) {
+ console.log(
+ "[ChatMessageDisplay] Auto-fixed XML issues:",
+ validation.fixes,
+ )
+ }
// Skip validation in loadDiagram since we already validated above
- onDisplayChart(replacedXML, true)
+ onDisplayChart(xmlToLoad, true)
} else {
console.error(
"[ChatMessageDisplay] XML validation failed:",
- validationError,
+ validation.error,
)
// Only show toast if this is the final XML (not during streaming)
if (showToast) {
diff --git a/contexts/diagram-context.tsx b/contexts/diagram-context.tsx
index 5a2477f..e76d587 100644
--- a/contexts/diagram-context.tsx
+++ b/contexts/diagram-context.tsx
@@ -5,7 +5,7 @@ import { createContext, useContext, useRef, useState } from "react"
import type { DrawIoEmbedRef } from "react-drawio"
import { STORAGE_DIAGRAM_XML_KEY } from "@/components/chat-panel"
import type { ExportFormat } from "@/components/save-dialog"
-import { extractDiagramXML, validateMxCellStructure } from "../lib/utils"
+import { extractDiagramXML, validateAndFixXml } from "../lib/utils"
interface DiagramContextType {
chartXML: string
@@ -86,21 +86,34 @@ export function DiagramProvider({ children }: { children: React.ReactNode }) {
chart: string,
skipValidation?: boolean,
): string | null => {
+ let xmlToLoad = chart
+
// Validate XML structure before loading (unless skipped for internal use)
if (!skipValidation) {
- const validationError = validateMxCellStructure(chart)
- if (validationError) {
- console.warn("[loadDiagram] Validation error:", validationError)
- return validationError
+ const validation = validateAndFixXml(chart)
+ if (!validation.valid) {
+ console.warn(
+ "[loadDiagram] Validation error:",
+ validation.error,
+ )
+ return validation.error
+ }
+ // Use fixed XML if auto-fix was applied
+ if (validation.fixed) {
+ console.log(
+ "[loadDiagram] Auto-fixed XML issues:",
+ validation.fixes,
+ )
+ xmlToLoad = validation.fixed
}
}
// Keep chartXML in sync even when diagrams are injected (e.g., display_diagram tool)
- setChartXML(chart)
+ setChartXML(xmlToLoad)
if (drawioRef.current) {
drawioRef.current.load({
- xml: chart,
+ xml: xmlToLoad,
})
}
diff --git a/lib/utils.ts b/lib/utils.ts
index 8c4d4eb..34a15a1 100644
--- a/lib/utils.ts
+++ b/lib/utils.ts
@@ -535,141 +535,677 @@ export function replaceXMLParts(
/**
* Validates draw.io XML structure for common issues
+ * Uses DOM parsing + additional regex checks for high accuracy
* @param xml - The XML string to validate
* @returns null if valid, error message string if invalid
*/
export function validateMxCellStructure(xml: string): string | null {
- const parser = new DOMParser()
- const doc = parser.parseFromString(xml, "text/xml")
+ // 0. First use DOM parser to catch syntax errors (most accurate)
+ try {
+ const parser = new DOMParser()
+ const doc = parser.parseFromString(xml, "text/xml")
+ const parseError = doc.querySelector("parsererror")
+ if (parseError) {
+ return `Invalid XML: The XML contains syntax errors (likely unescaped special characters like <, >, & in attribute values). Please escape special characters: use < for <, > for >, & for &, " for ". Regenerate the diagram with properly escaped values.`
+ }
- // Check for XML parsing errors (includes unescaped special characters)
- const parseError = doc.querySelector("parsererror")
- if (parseError) {
- return `Invalid XML: The XML contains syntax errors (likely unescaped special characters like <, >, & in attribute values). Please escape special characters: use < for <, > for >, & for &, " for ". Regenerate the diagram with properly escaped values.`
- }
-
- // Get all mxCell elements once for all validations
- const allCells = doc.querySelectorAll("mxCell")
-
- // Single pass: collect IDs, check for duplicates, nesting, orphans, and invalid parents
- const cellIds = new Set()
- const duplicateIds: string[] = []
- const nestedCells: string[] = []
- const orphanCells: string[] = []
- const invalidParents: { id: string; parent: string }[] = []
- const edgesToValidate: {
- id: string
- source: string | null
- target: string | null
- }[] = []
-
- allCells.forEach((cell) => {
- const id = cell.getAttribute("id")
- const parent = cell.getAttribute("parent")
- const isEdge = cell.getAttribute("edge") === "1"
-
- // Check for duplicate IDs
- if (id) {
- if (cellIds.has(id)) {
- duplicateIds.push(id)
- } else {
- cellIds.add(id)
+ // DOM-based checks for nested mxCell
+ const allCells = doc.querySelectorAll("mxCell")
+ for (const cell of allCells) {
+ if (cell.parentElement?.tagName === "mxCell") {
+ const id = cell.getAttribute("id") || "unknown"
+ return `Invalid XML: Found nested mxCell (id="${id}"). Cells should be siblings, not nested inside other mxCell elements.`
}
}
-
- // Check for nested mxCell (parent element is also mxCell)
- if (cell.parentElement?.tagName === "mxCell") {
- nestedCells.push(id || "unknown")
- }
-
- // Check parent attribute (skip root cell id="0")
- if (id !== "0") {
- if (!parent) {
- if (id) orphanCells.push(id)
- } else {
- // Store for later validation (after all IDs collected)
- invalidParents.push({ id: id || "unknown", parent })
- }
- }
-
- // Collect edges for connection validation
- if (isEdge) {
- edgesToValidate.push({
- id: id || "unknown",
- source: cell.getAttribute("source"),
- target: cell.getAttribute("target"),
- })
- }
- })
-
- // Return errors in priority order
- if (nestedCells.length > 0) {
- return `Invalid XML: Found nested mxCell elements (IDs: ${nestedCells.slice(0, 3).join(", ")}). All mxCell elements must be direct children of , never nested inside other mxCell elements. Please regenerate the diagram with correct structure.`
+ } catch {
+ // If DOMParser fails, continue with regex checks
}
+ // 1. Check for CDATA wrapper (invalid at document root)
+ if (/^\s* from end"
+ }
+
+ // 2. Check for duplicate structural attributes in tags
+ const structuralAttrs = new Set([
+ "edge",
+ "parent",
+ "source",
+ "target",
+ "vertex",
+ "connectable",
+ ])
+ const tagPattern = /<[^>]+>/g
+ let tagMatch
+ while ((tagMatch = tagPattern.exec(xml)) !== null) {
+ const tag = tagMatch[0]
+ const attrPattern = /\s([a-zA-Z_:][a-zA-Z0-9_:.-]*)\s*=/g
+ const attributes = new Map()
+ let attrMatch
+ while ((attrMatch = attrPattern.exec(tag)) !== null) {
+ const attrName = attrMatch[1]
+ attributes.set(attrName, (attributes.get(attrName) || 0) + 1)
+ }
+ const duplicates = Array.from(attributes.entries())
+ .filter(([name, count]) => count > 1 && structuralAttrs.has(name))
+ .map(([name]) => name)
+ if (duplicates.length > 0) {
+ return `Invalid XML: Duplicate structural attribute(s): ${duplicates.join(", ")}. Remove duplicate attributes.`
+ }
+ }
+
+ // 3. Check for unescaped < in attribute values
+ const attrValuePattern = /=\s*"([^"]*)"/g
+ let attrValMatch
+ while ((attrValMatch = attrValuePattern.exec(xml)) !== null) {
+ const value = attrValMatch[1]
+ if (/()
+ let idMatch
+ while ((idMatch = idPattern.exec(xml)) !== null) {
+ const id = idMatch[1]
+ ids.set(id, (ids.get(id) || 0) + 1)
+ }
+ const duplicateIds = Array.from(ids.entries())
+ .filter(([, count]) => count > 1)
+ .map(([id, count]) => `'${id}' (${count}x)`)
if (duplicateIds.length > 0) {
- return `Invalid XML: Found duplicate cell IDs (${duplicateIds.slice(0, 3).join(", ")}). Each mxCell must have a unique ID. Please regenerate the diagram with unique IDs for all elements.`
+ return `Invalid XML: Found duplicate ID(s): ${duplicateIds.slice(0, 3).join(", ")}. All id attributes must be unique.`
}
- if (orphanCells.length > 0) {
- return `Invalid XML: Found cells without parent attribute (IDs: ${orphanCells.slice(0, 3).join(", ")}). All mxCell elements (except id="0") must have a parent attribute. Please regenerate the diagram with proper parent references.`
- }
+ // 5. Check for tag mismatches using stateful parser
+ const xmlWithoutComments = xml.replace(//g, "")
+ const tagStack: string[] = []
- // Validate parent references (now that all IDs are collected)
- const badParents = invalidParents.filter((p) => !cellIds.has(p.parent))
- if (badParents.length > 0) {
- const details = badParents
- .slice(0, 3)
- .map((p) => `${p.id} (parent: ${p.parent})`)
- .join(", ")
- return `Invalid XML: Found cells with invalid parent references (${details}). Parent IDs must reference existing cells. Please regenerate the diagram with valid parent references.`
- }
+ // Parse tags properly by handling quoted strings
+ let i = 0
+ while (i < xmlWithoutComments.length) {
+ // Find next <
+ const tagStart = xmlWithoutComments.indexOf("<", i)
+ if (tagStart === -1) break
- // Validate edge connections
- const invalidConnections: string[] = []
- edgesToValidate.forEach((edge) => {
- if (edge.source && !cellIds.has(edge.source)) {
- invalidConnections.push(`${edge.id} (source: ${edge.source})`)
- }
- if (edge.target && !cellIds.has(edge.target)) {
- invalidConnections.push(`${edge.id} (target: ${edge.target})`)
- }
- })
-
- if (invalidConnections.length > 0) {
- return `Invalid XML: Found edges with invalid source/target references (${invalidConnections.slice(0, 3).join(", ")}). Edge source and target must reference existing cell IDs. Please regenerate the diagram with valid edge connections.`
- }
-
- // Check for orphaned mxPoint elements (not inside and without 'as' attribute)
- // These cause "Could not add object mxPoint" errors in draw.io
- const allMxPoints = doc.querySelectorAll("mxPoint")
- const orphanedMxPoints: string[] = []
- allMxPoints.forEach((point) => {
- const hasAsAttr = point.hasAttribute("as")
- const parentIsArray =
- point.parentElement?.tagName === "Array" &&
- point.parentElement?.getAttribute("as") === "points"
-
- if (!hasAsAttr && !parentIsArray) {
- // Find the parent mxCell to report which edge has the problem
- let parent = point.parentElement
- while (parent && parent.tagName !== "mxCell") {
- parent = parent.parentElement
+ // Find matching > by tracking quotes
+ let tagEnd = tagStart + 1
+ let inQuote = false
+ let quoteChar = ""
+ while (tagEnd < xmlWithoutComments.length) {
+ const c = xmlWithoutComments[tagEnd]
+ if (inQuote) {
+ if (c === quoteChar) inQuote = false
+ } else {
+ if (c === '"' || c === "'") {
+ inQuote = true
+ quoteChar = c
+ } else if (c === ">") {
+ break
+ }
}
- const cellId = parent?.getAttribute("id") || "unknown"
- if (!orphanedMxPoints.includes(cellId)) {
- orphanedMxPoints.push(cellId)
+ tagEnd++
+ }
+
+ if (tagEnd >= xmlWithoutComments.length) break
+
+ const tag = xmlWithoutComments.substring(tagStart, tagEnd + 1)
+ i = tagEnd + 1
+
+ // Parse the tag
+ const tagMatch = /^<(\/?)([a-zA-Z][a-zA-Z0-9:_-]*)/.exec(tag)
+ if (!tagMatch) continue
+
+ const isClosing = tagMatch[1] === "/"
+ const tagName = tagMatch[2]
+ const isSelfClosing = tag.endsWith("/>")
+
+ if (isClosing) {
+ if (tagStack.length === 0) {
+ return `Invalid XML: Closing tag ${tagName}> without matching opening tag`
+ }
+ const expected = tagStack.pop()
+ if (expected?.toLowerCase() !== tagName.toLowerCase()) {
+ return `Invalid XML: Expected closing tag ${expected}> but found ${tagName}>`
+ }
+ } else if (!isSelfClosing) {
+ tagStack.push(tagName)
+ }
+ }
+ if (tagStack.length > 0) {
+ return `Invalid XML: Document has ${tagStack.length} unclosed tag(s): ${tagStack.join(", ")}`
+ }
+
+ // 6. Check invalid character references
+ const charRefPattern = /?[^;]+;?/g
+ let charMatch
+ while ((charMatch = charRefPattern.exec(xml)) !== null) {
+ const ref = charMatch[0]
+ if (ref.startsWith("")) {
+ if (!ref.endsWith(";")) {
+ return `Invalid XML: Missing semicolon after hex reference: ${ref}`
+ }
+ const hexDigits = ref.substring(3, ref.length - 1)
+ if (hexDigits.length === 0 || !/^[0-9a-fA-F]+$/.test(hexDigits)) {
+ return `Invalid XML: Invalid hex character reference: ${ref}`
+ }
+ } else if (ref.startsWith("")) {
+ if (!ref.endsWith(";")) {
+ return `Invalid XML: Missing semicolon after decimal reference: ${ref}`
+ }
+ const decDigits = ref.substring(2, ref.length - 1)
+ if (decDigits.length === 0 || !/^[0-9]+$/.test(decDigits)) {
+ return `Invalid XML: Invalid decimal character reference: ${ref}`
}
}
- })
+ }
- if (orphanedMxPoints.length > 0) {
- return `Invalid XML: Found orphaned mxPoint elements in cells (${orphanedMxPoints.slice(0, 3).join(", ")}). mxPoint elements must either have an 'as' attribute (e.g., as="sourcePoint") or be inside . For edge waypoints, use: . Please fix the mxPoint structure.`
+ // 7. Check for invalid comment syntax (-- inside comments)
+ const commentPattern = //g
+ let commentMatch
+ while ((commentMatch = commentPattern.exec(xml)) !== null) {
+ if (/--/.test(commentMatch[1])) {
+ return "Invalid XML: Comment contains -- (double hyphen) which is not allowed"
+ }
+ }
+
+ // 8. Check for unescaped entity references and invalid entity names
+ const bareAmpPattern = /&(?!(?:lt|gt|amp|quot|apos|#))/g
+ if (bareAmpPattern.test(xmlWithoutComments)) {
+ return "Invalid XML: Found unescaped & character(s). Replace & with &"
+ }
+ const invalidEntityPattern = /&([a-zA-Z][a-zA-Z0-9]*);/g
+ const validEntities = new Set(["lt", "gt", "amp", "quot", "apos"])
+ let entityMatch
+ while (
+ (entityMatch = invalidEntityPattern.exec(xmlWithoutComments)) !== null
+ ) {
+ if (!validEntities.has(entityMatch[1])) {
+ return `Invalid XML: Invalid entity reference: &${entityMatch[1]}; - use only valid XML entities (lt, gt, amp, quot, apos)`
+ }
+ }
+
+ // 9. Check for empty id attributes on mxCell
+ if (/]*\sid\s*=\s*["']\s*["'][^>]*>/g.test(xml)) {
+ return "Invalid XML: Found mxCell element(s) with empty id attribute"
+ }
+
+ // 10. Check for mxfile wrapper (warning only - may not work with URL hash loading)
+ // Disabled: This is just a warning, not an error
+ // if (xml.trim().startsWith(']*>/g
+ const cellStack: number[] = []
+ let cellMatch
+ while ((cellMatch = cellTagPattern.exec(xml)) !== null) {
+ const tag = cellMatch[0]
+ if (tag.startsWith("")) {
+ if (cellStack.length > 0) cellStack.pop()
+ } else if (!tag.endsWith("/>")) {
+ const isLabelOrGeometry =
+ /\sas\s*=\s*["'](valueLabel|geometry)["']/.test(tag)
+ if (!isLabelOrGeometry) {
+ cellStack.push(cellMatch.index)
+ if (cellStack.length > 1) {
+ return "Invalid XML: Found nested mxCell tags. Cells should be siblings, not nested inside other mxCell elements."
+ }
+ }
+ }
}
return null
}
+/**
+ * Attempts to auto-fix common XML issues in draw.io diagrams
+ * @param xml - The XML string to fix
+ * @returns Object with fixed XML and list of fixes applied
+ */
+export function autoFixXml(xml: string): { fixed: string; fixes: string[] } {
+ let fixed = xml
+ const fixes: string[] = []
+
+ // 1. Remove CDATA wrapper
+ if (/^\s*\s*$/, "")
+ fixes.push("Removed CDATA wrapper")
+ }
+
+ // 2. Fix duplicate attributes (keep first occurrence, remove duplicates)
+ const structuralAttrsToFix = [
+ "edge",
+ "parent",
+ "source",
+ "target",
+ "vertex",
+ "connectable",
+ ]
+ let dupAttrFixed = false
+ fixed = fixed.replace(/<[^>]+>/g, (tag) => {
+ const seenAttrs = new Set()
+ let newTag = tag
+
+ for (const attr of structuralAttrsToFix) {
+ // Find all occurrences of this attribute
+ const attrRegex = new RegExp(
+ `\\s${attr}\\s*=\\s*["'][^"']*["']`,
+ "gi",
+ )
+ const matches = tag.match(attrRegex)
+
+ if (matches && matches.length > 1) {
+ // Keep first, remove others
+ let firstKept = false
+ newTag = newTag.replace(attrRegex, (m) => {
+ if (!firstKept) {
+ firstKept = true
+ return m
+ }
+ dupAttrFixed = true
+ return ""
+ })
+ }
+ }
+ return newTag
+ })
+ if (dupAttrFixed) {
+ fixes.push("Removed duplicate structural attributes")
+ }
+
+ // 3. Fix unescaped & characters (but not valid entities)
+ // Match & not followed by valid entity pattern
+ const ampersandPattern =
+ /&(?!(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);)/g
+ if (ampersandPattern.test(fixed)) {
+ fixed = fixed.replace(
+ /&(?!(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);)/g,
+ "&",
+ )
+ fixes.push("Escaped unescaped & characters")
+ }
+
+ // 3. Fix invalid entity names like &quot; -> "
+ // Common mistake: double-escaping
+ const invalidEntities = [
+ { pattern: /&quot;/g, replacement: """, name: "&quot;" },
+ { pattern: /&lt;/g, replacement: "<", name: "&lt;" },
+ { pattern: /&gt;/g, replacement: ">", name: "&gt;" },
+ { pattern: /&apos;/g, replacement: "'", name: "&apos;" },
+ { pattern: /&amp;/g, replacement: "&", name: "&amp;" },
+ ]
+ for (const { pattern, replacement, name } of invalidEntities) {
+ if (pattern.test(fixed)) {
+ fixed = fixed.replace(pattern, replacement)
+ fixes.push(`Fixed double-escaped entity ${name}`)
+ }
+ }
+
+ // 3b. Fix malformed attribute values where " is used as delimiter
+ // Pattern: attr="value" should be attr=""value""
+ const malformedQuotePattern =
+ /(\s[a-zA-Z][a-zA-Z0-9_:-]*)="([^&]*(?:&(?!quot;)[^&]*)*)"/g
+ if (malformedQuotePattern.test(fixed)) {
+ fixed = fixed.replace(
+ /(\s[a-zA-Z][a-zA-Z0-9_:-]*)="([^&]*(?:&(?!quot;)[^&]*)*)"/g,
+ '$1=""$2""',
+ )
+ fixes.push(
+ 'Fixed malformed attribute quotes (="..." to =""..."")',
+ )
+ }
+
+ // 4. Fix unescaped < in attribute values
+ // This is tricky - we need to find < inside quoted attribute values
+ const attrPattern = /(=\s*")([^"]*?)(<)([^"]*?)(")/g
+ let attrMatch
+ let hasUnescapedLt = false
+ while ((attrMatch = attrPattern.exec(fixed)) !== null) {
+ if (!attrMatch[3].startsWith("<")) {
+ hasUnescapedLt = true
+ break
+ }
+ }
+ if (hasUnescapedLt) {
+ // Replace < with < inside attribute values
+ fixed = fixed.replace(/=\s*"([^"]*)"/g, (match, value) => {
+ const escaped = value.replace(/ {
+ if (/^[0-9a-fA-F]+$/.test(hex) && hex.length > 0) {
+ return match // Valid hex ref, keep it
+ }
+ invalidHexRefs.push(match)
+ return "" // Remove invalid ref
+ })
+ if (invalidHexRefs.length > 0) {
+ fixes.push(
+ `Removed ${invalidHexRefs.length} invalid hex character reference(s)`,
+ )
+ }
+
+ // 6. Fix invalid decimal character references
+ const invalidDecRefs: string[] = []
+ fixed = fixed.replace(/([^x][^;]*);/g, (match, dec) => {
+ if (/^[0-9]+$/.test(dec) && dec.length > 0) {
+ return match // Valid decimal ref, keep it
+ }
+ invalidDecRefs.push(match)
+ return "" // Remove invalid ref
+ })
+ if (invalidDecRefs.length > 0) {
+ fixes.push(
+ `Removed ${invalidDecRefs.length} invalid decimal character reference(s)`,
+ )
+ }
+
+ // 7. Fix invalid comment syntax (replace -- with - repeatedly until none left)
+ fixed = fixed.replace(//g, (match, content) => {
+ if (/--/.test(content)) {
+ // Keep replacing until no double hyphens remain
+ let fixedContent = content
+ while (/--/.test(fixedContent)) {
+ fixedContent = fixedContent.replace(/--/g, "-")
+ }
+ fixes.push("Fixed invalid comment syntax (removed double hyphens)")
+ return ``
+ }
+ return match
+ })
+
+ // 8. Fix tags that should be (common LLM mistake)
+ // This handles both opening and closing tags
+ const hasCellTags = /<\/?Cell[\s>]/i.test(fixed)
+ if (hasCellTags) {
+ fixed = fixed.replace(//gi, "")
+ fixed = fixed.replace(/<\/Cell>/gi, "")
+ fixes.push("Fixed | tags to ")
+ }
+
+ // 9. Fix common closing tag typos
+ const tagTypos = [
+ { wrong: /<\/mxElement>/gi, right: "", name: "" },
+ { wrong: /<\/mxcell>/g, right: " | | ", name: "" }, // case sensitivity
+ {
+ wrong: /<\/mxgeometry>/g,
+ right: "",
+ name: "",
+ },
+ { wrong: /<\/mxpoint>/g, right: "", name: "" },
+ {
+ wrong: /<\/mxgraphmodel>/gi,
+ right: "",
+ name: "",
+ },
+ ]
+ for (const { wrong, right, name } of tagTypos) {
+ if (wrong.test(fixed)) {
+ fixed = fixed.replace(wrong, right)
+ fixes.push(`Fixed typo ${name} to ${right}`)
+ }
+ }
+
+ // 10. Fix unclosed tags by appending missing closing tags
+ // Track open tags and close any that are left open using stateful parser
+ const tagStack: string[] = []
+
+ let idx = 0
+ while (idx < fixed.length) {
+ const tagStart = fixed.indexOf("<", idx)
+ if (tagStart === -1) break
+
+ // Find matching > by tracking quotes
+ let tagEnd = tagStart + 1
+ let inQuote = false
+ let quoteChar = ""
+ while (tagEnd < fixed.length) {
+ const c = fixed[tagEnd]
+ if (inQuote) {
+ if (c === quoteChar) inQuote = false
+ } else {
+ if (c === '"' || c === "'") {
+ inQuote = true
+ quoteChar = c
+ } else if (c === ">") {
+ break
+ }
+ }
+ tagEnd++
+ }
+
+ if (tagEnd >= fixed.length) break
+
+ const tag = fixed.substring(tagStart, tagEnd + 1)
+ idx = tagEnd + 1
+
+ const tagMatch2 = /^<(\/?)([a-zA-Z][a-zA-Z0-9:_-]*)/.exec(tag)
+ if (!tagMatch2) continue
+
+ const isClosing = tagMatch2[1] === "/"
+ const tagName = tagMatch2[2]
+ const isSelfClosing = tag.endsWith("/>")
+
+ if (isClosing) {
+ // Find matching opening tag (may not be the last one if there's mismatch)
+ const lastIdx = tagStack.lastIndexOf(tagName)
+ if (lastIdx !== -1) {
+ tagStack.splice(lastIdx, 1)
+ }
+ } else if (!isSelfClosing) {
+ tagStack.push(tagName)
+ }
+ }
+
+ // If there are unclosed tags, append closing tags in reverse order
+ // But first verify with simple count that they're actually unclosed
+ if (tagStack.length > 0) {
+ const tagsToClose: string[] = []
+ for (const tagName of tagStack.reverse()) {
+ // Simple count check: only close if opens > closes
+ const openCount = (
+ fixed.match(new RegExp(`<${tagName}[\\s>]`, "gi")) || []
+ ).length
+ const closeCount = (
+ fixed.match(new RegExp(`${tagName}>`, "gi")) || []
+ ).length
+ if (openCount > closeCount) {
+ tagsToClose.push(tagName)
+ }
+ }
+ if (tagsToClose.length > 0) {
+ const closingTags = tagsToClose.map((t) => `${t}>`).join("\n")
+ fixed = fixed.trimEnd() + "\n" + closingTags
+ fixes.push(
+ `Closed ${tagsToClose.length} unclosed tag(s): ${tagsToClose.join(", ")}`,
+ )
+ }
+ }
+
+ // 11. Fix nested mxCell by flattening
+ // Pattern A: ...... (duplicate ID)
+ // Pattern B: ...... (different ID - true nesting)
+ const lines = fixed.split("\n")
+ let newLines: string[] = []
+ let nestedFixed = 0
+ let extraClosingToRemove = 0
+
+ // First pass: fix duplicate ID nesting (same as before)
+ for (let i = 0; i < lines.length; i++) {
+ const line = lines[i]
+ const nextLine = lines[i + 1]
+
+ // Check if current line and next line are both mxCell opening tags with same ID
+ if (
+ nextLine &&
+ /") &&
+ !nextLine.includes("/>")
+ ) {
+ const id1 = line.match(/\bid\s*=\s*["']([^"']+)["']/)?.[1]
+ const id2 = nextLine.match(/\bid\s*=\s*["']([^"']+)["']/)?.[1]
+
+ if (id1 && id1 === id2) {
+ nestedFixed++
+ extraClosingToRemove++ // Need to remove one later
+ continue // Skip this duplicate opening line
+ }
+ }
+
+ // Remove extra if we have pending removals
+ if (extraClosingToRemove > 0 && /^\s*<\/mxCell>\s*$/.test(line)) {
+ extraClosingToRemove--
+ continue // Skip this closing tag
+ }
+
+ newLines.push(line)
+ }
+
+ if (nestedFixed > 0) {
+ fixed = newLines.join("\n")
+ fixes.push(`Flattened ${nestedFixed} duplicate-ID nested mxCell(s)`)
+ }
+
+ // Second pass: fix true nesting (different IDs)
+ // Insert before nested child to close parent
+ const lines2 = fixed.split("\n")
+ newLines = []
+ let trueNestedFixed = 0
+ let cellDepth = 0
+ let pendingCloseRemoval = 0
+
+ for (let i = 0; i < lines2.length; i++) {
+ const line = lines2[i]
+ const trimmed = line.trim()
+
+ // Track mxCell depth
+ const isOpenCell = /")
+ const isCloseCell = trimmed === ""
+ const isSelfClose = /]*\/>/.test(trimmed)
+
+ if (isOpenCell) {
+ if (cellDepth > 0) {
+ // Found nested cell - insert closing tag for parent before this line
+ const indent = line.match(/^(\s*)/)?.[1] || ""
+ newLines.push(indent + "")
+ trueNestedFixed++
+ pendingCloseRemoval++ // Need to remove one later
+ }
+ cellDepth = 1 // Reset to 1 since we just opened a new cell
+ newLines.push(line)
+ } else if (isCloseCell) {
+ if (pendingCloseRemoval > 0) {
+ pendingCloseRemoval--
+ // Skip this extra closing tag
+ } else {
+ cellDepth = Math.max(0, cellDepth - 1)
+ newLines.push(line)
+ }
+ } else {
+ newLines.push(line)
+ }
+ }
+
+ if (trueNestedFixed > 0) {
+ fixed = newLines.join("\n")
+ fixes.push(`Fixed ${trueNestedFixed} true nested mxCell(s)`)
+ }
+
+ // 12. Fix duplicate IDs by appending suffix
+ const idPattern = /\bid\s*=\s*["']([^"']+)["']/gi
+ const seenIds = new Map()
+ const duplicateIds: string[] = []
+
+ // First pass: find duplicates
+ let idMatch
+ const tempPattern = /\bid\s*=\s*["']([^"']+)["']/gi
+ while ((idMatch = tempPattern.exec(fixed)) !== null) {
+ const id = idMatch[1]
+ seenIds.set(id, (seenIds.get(id) || 0) + 1)
+ }
+
+ // Find which IDs are duplicated
+ for (const [id, count] of seenIds) {
+ if (count > 1) duplicateIds.push(id)
+ }
+
+ // Second pass: rename duplicates (keep first occurrence, rename others)
+ if (duplicateIds.length > 0) {
+ const idCounters = new Map()
+ fixed = fixed.replace(/\bid\s*=\s*["']([^"']+)["']/gi, (match, id) => {
+ if (!duplicateIds.includes(id)) return match
+
+ const count = idCounters.get(id) || 0
+ idCounters.set(id, count + 1)
+
+ if (count === 0) return match // Keep first occurrence
+
+ // Rename subsequent occurrences
+ const newId = `${id}_dup${count}`
+ return match.replace(id, newId)
+ })
+ fixes.push(`Renamed ${duplicateIds.length} duplicate ID(s)`)
+ }
+
+ // 9. Fix empty id attributes by generating unique IDs
+ let emptyIdCount = 0
+ fixed = fixed.replace(
+ /]*)\sid\s*=\s*["']\s*["']([^>]*)>/g,
+ (match, before, after) => {
+ emptyIdCount++
+ const newId = `cell_${Date.now()}_${emptyIdCount}`
+ return ``
+ },
+ )
+ if (emptyIdCount > 0) {
+ fixes.push(`Generated ${emptyIdCount} missing ID(s)`)
+ }
+
+ return { fixed, fixes }
+}
+
+/**
+ * Validates XML and attempts to fix if invalid
+ * @param xml - The XML string to validate and potentially fix
+ * @returns Object with validation result, fixed XML if applicable, and fixes applied
+ */
+export function validateAndFixXml(xml: string): {
+ valid: boolean
+ error: string | null
+ fixed: string | null
+ fixes: string[]
+} {
+ // First validation attempt
+ let error = validateMxCellStructure(xml)
+
+ if (!error) {
+ return { valid: true, error: null, fixed: null, fixes: [] }
+ }
+
+ // Try to fix
+ const { fixed, fixes } = autoFixXml(xml)
+
+ // Validate the fixed version
+ error = validateMxCellStructure(fixed)
+
+ if (!error) {
+ return { valid: true, error: null, fixed, fixes }
+ }
+
+ // Still invalid after fixes
+ return { valid: false, error, fixed: null, fixes }
+}
+
export function extractDiagramXML(xml_svg_string: string): string {
try {
// 1. Parse the SVG string (using built-in DOMParser in a browser-like environment)
|