fix: improve XML auto-fix with malformed quote pattern

- Fix ="..." pattern where " was used as delimiter instead of actual quotes
- Common in dashPattern attributes like dashPattern="1 1;"
This commit is contained in:
dayuan.jiang
2025-12-13 20:52:44 +09:00
parent 39c0497762
commit ecb23be112

View File

@@ -866,18 +866,15 @@ export function autoFixXml(xml: string): { fixed: string; fixes: string[] } {
let fixed = xml let fixed = xml
const fixes: string[] = [] const fixes: string[] = []
// 0. Fix backslash-escaped quotes (common LLM mistakes) // 0. Fix JSON-escaped XML (common when XML is stored in JSON without unescaping)
// Handles: attr=\"value\", value="text\"inner\"more", and mixed patterns // Only apply when we see JSON-escaped attribute patterns like =\"value\"
// Uses backreference to match opening/closing quote style, then normalizes // Don't apply to legitimate \n in value attributes (draw.io uses these for line breaks)
if (/\\"/.test(fixed)) { if (/=\\"/.test(fixed)) {
fixed = fixed.replace( // Replace literal \" with actual quotes
/(\s[\w:-]+)\s*=\s*(\\"|")([\s\S]*?)\2(?=[\s/>?]|$)/g, fixed = fixed.replace(/\\"/g, '"')
(_match, attrName, _openQuote, content) => { // Replace literal \n with actual newlines (only after confirming JSON-escaped)
const cleanContent = content.replace(/\\"/g, """) fixed = fixed.replace(/\\n/g, "\n")
return `${attrName}="${cleanContent}"` fixes.push("Fixed JSON-escaped XML")
},
)
fixes.push("Fixed backslash-escaped quotes")
} }
// 1. Remove CDATA wrapper (MUST be before text-before-root check) // 1. Remove CDATA wrapper (MUST be before text-before-root check)
@@ -953,20 +950,44 @@ export function autoFixXml(xml: string): { fixed: string; fixes: string[] } {
} }
} }
// 3b. Fix malformed attribute values where " is used as delimiter // 3b. Fix malformed attribute values where " is used as delimiter instead of actual quotes
// Pattern: attr="value" should be attr=""value"" // Pattern: attr="value" should become attr="value" (the " was meant to be the quote delimiter)
const malformedQuotePattern = // This commonly happens with dashPattern="1 1;"
/(\s[a-zA-Z][a-zA-Z0-9_:-]*)="([^&]*(?:&(?!quot;)[^&]*)*)"/g const malformedQuotePattern = /(\s[a-zA-Z][a-zA-Z0-9_:-]*)="/
if (malformedQuotePattern.test(fixed)) { if (malformedQuotePattern.test(fixed)) {
// Replace =" with =" and trailing " before next attribute or tag end with "
fixed = fixed.replace( fixed = fixed.replace(
/(\s[a-zA-Z][a-zA-Z0-9_:-]*)="([^&]*(?:&(?!quot;)[^&]*)*)"/g, /(\s[a-zA-Z][a-zA-Z0-9_:-]*)="([^&]*?)"/g,
'$1=""$2""', '$1="$2"',
) )
fixes.push( fixes.push(
'Fixed malformed attribute quotes (="..." to =""..."")', 'Fixed malformed attribute quotes (="..." to ="...")',
) )
} }
// 3c. Fix malformed closing tags like </tag/> -> </tag>
const malformedClosingTag = /<\/([a-zA-Z][a-zA-Z0-9]*)\s*\/>/g
if (malformedClosingTag.test(fixed)) {
fixed = fixed.replace(/<\/([a-zA-Z][a-zA-Z0-9]*)\s*\/>/g, "</$1>")
fixes.push("Fixed malformed closing tags (</tag/> to </tag>)")
}
// 3d. Fix missing space between attributes like vertex="1"parent="1"
const missingSpacePattern = /("[^"]*")([a-zA-Z][a-zA-Z0-9_:-]*=)/g
if (missingSpacePattern.test(fixed)) {
fixed = fixed.replace(/("[^"]*")([a-zA-Z][a-zA-Z0-9_:-]*=)/g, "$1 $2")
fixes.push("Added missing space between attributes")
}
// 3e. Fix unescaped quotes in style color values like fillColor="#fff2e6"
// The " after Color= prematurely ends the style attribute. Remove it.
// Pattern: ;fillColor="#fff → ;fillColor=#fff (remove first ", keep second as style closer)
const quotedColorPattern = /;([a-zA-Z]*[Cc]olor)="#/
if (quotedColorPattern.test(fixed)) {
fixed = fixed.replace(/;([a-zA-Z]*[Cc]olor)="#/g, ";$1=#")
fixes.push("Removed quotes around color values in style")
}
// 4. Fix unescaped < in attribute values // 4. Fix unescaped < in attribute values
// This is tricky - we need to find < inside quoted attribute values // This is tricky - we need to find < inside quoted attribute values
const attrPattern = /(=\s*")([^"]*?)(<)([^"]*?)(")/g const attrPattern = /(=\s*")([^"]*?)(<)([^"]*?)(")/g