Files
next-ai-draw-io/app/api/chat/route.ts

817 lines
32 KiB
TypeScript
Raw Normal View History

import {
APICallError,
convertToModelMessages,
createUIMessageStream,
createUIMessageStreamResponse,
feat: add append_diagram tool and improve truncation handling (#252) * feat: add append_diagram tool for truncation continuation When LLM output hits maxOutputTokens mid-generation, instead of failing with an error loop, the system now: 1. Detects truncation (missing </root> in XML) 2. Stores partial XML and tells LLM to use new append_diagram tool 3. LLM continues generating from where it stopped 4. Fragments are accumulated until XML is complete 5. Server limits to 5 steps via stepCountIs(5) Key changes: - Add append_diagram tool definition in route.ts - Add append_diagram handler in chat-panel.tsx - Track continuation mode separately from error mode - Continuation mode has unlimited retries (not counted against limit) - Error mode still limited to MAX_AUTO_RETRY_COUNT (1) - Update system prompts to document append_diagram tool * fix: show friendly message and yellow badge for truncated output - Add yellow 'Truncated' badge in UI instead of red 'Error' when XML is incomplete - Show friendly error message for toolUse.input is invalid errors - Built on top of append_diagram continuation feature * refactor: remove debug logs and simplify truncation state - Remove all debug console.log statements - Remove isContinuationModeRef, derive from partialXmlRef.current.length > 0 * docs: fix append_diagram instructions for consistency - Change 'Do NOT include' to 'Do NOT start with' (clearer intent) - Add <mxCell id="0"> to prohibited start patterns - Change 'closing tags </root></mxGraphModel>' to just '</root>' (wrapWithMxFile handles the rest)
2025-12-14 12:34:34 +09:00
InvalidToolInputError,
LoadAPIKeyError,
stepCountIs,
streamText,
} from "ai"
import fs from "fs/promises"
feat: add append_diagram tool and improve truncation handling (#252) * feat: add append_diagram tool for truncation continuation When LLM output hits maxOutputTokens mid-generation, instead of failing with an error loop, the system now: 1. Detects truncation (missing </root> in XML) 2. Stores partial XML and tells LLM to use new append_diagram tool 3. LLM continues generating from where it stopped 4. Fragments are accumulated until XML is complete 5. Server limits to 5 steps via stepCountIs(5) Key changes: - Add append_diagram tool definition in route.ts - Add append_diagram handler in chat-panel.tsx - Track continuation mode separately from error mode - Continuation mode has unlimited retries (not counted against limit) - Error mode still limited to MAX_AUTO_RETRY_COUNT (1) - Update system prompts to document append_diagram tool * fix: show friendly message and yellow badge for truncated output - Add yellow 'Truncated' badge in UI instead of red 'Error' when XML is incomplete - Show friendly error message for toolUse.input is invalid errors - Built on top of append_diagram continuation feature * refactor: remove debug logs and simplify truncation state - Remove all debug console.log statements - Remove isContinuationModeRef, derive from partialXmlRef.current.length > 0 * docs: fix append_diagram instructions for consistency - Change 'Do NOT include' to 'Do NOT start with' (clearer intent) - Add <mxCell id="0"> to prohibited start patterns - Change 'closing tags </root></mxGraphModel>' to just '</root>' (wrapWithMxFile handles the rest)
2025-12-14 12:34:34 +09:00
import { jsonrepair } from "jsonrepair"
import path from "path"
import { z } from "zod"
import { getAIModel, supportsPromptCaching } from "@/lib/ai-providers"
import { findCachedResponse } from "@/lib/cached-responses"
import {
checkAndIncrementRequest,
isQuotaEnabled,
recordTokenUsage,
} from "@/lib/dynamo-quota-manager"
import {
getTelemetryConfig,
setTraceInput,
setTraceOutput,
wrapWithObserve,
} from "@/lib/langfuse"
import { getSystemPrompt } from "@/lib/system-prompts"
export const maxDuration = 120
// File upload limits (must match client-side)
const MAX_FILE_SIZE = 2 * 1024 * 1024 // 2MB
const MAX_FILES = 5
// Helper function to validate file parts in messages
function validateFileParts(messages: any[]): {
valid: boolean
error?: string
} {
const lastMessage = messages[messages.length - 1]
const fileParts =
lastMessage?.parts?.filter((p: any) => p.type === "file") || []
if (fileParts.length > MAX_FILES) {
return {
valid: false,
error: `Too many files. Maximum ${MAX_FILES} allowed.`,
}
}
for (const filePart of fileParts) {
// Data URLs format: data:image/png;base64,<data>
// Base64 increases size by ~33%, so we check the decoded size
if (filePart.url?.startsWith("data:")) {
const base64Data = filePart.url.split(",")[1]
if (base64Data) {
const sizeInBytes = Math.ceil((base64Data.length * 3) / 4)
if (sizeInBytes > MAX_FILE_SIZE) {
return {
valid: false,
error: `File exceeds ${MAX_FILE_SIZE / 1024 / 1024}MB limit.`,
}
}
}
}
}
return { valid: true }
}
// Helper function to check if diagram is minimal/empty
function isMinimalDiagram(xml: string): boolean {
const stripped = xml.replace(/\s/g, "")
return !stripped.includes('id="2"')
}
// Helper function to replace historical tool call XML with placeholders
// This reduces token usage and forces LLM to rely on the current diagram XML (source of truth)
// Also fixes invalid/undefined inputs from interrupted streaming
function replaceHistoricalToolInputs(messages: any[]): any[] {
return messages.map((msg) => {
if (msg.role !== "assistant" || !Array.isArray(msg.content)) {
return msg
}
const replacedContent = msg.content
.map((part: any) => {
if (part.type === "tool-call") {
const toolName = part.toolName
// Fix invalid/undefined inputs from interrupted streaming
if (
!part.input ||
typeof part.input !== "object" ||
Object.keys(part.input).length === 0
) {
// Skip tool calls with invalid inputs entirely
return null
}
if (
toolName === "display_diagram" ||
toolName === "edit_diagram"
) {
return {
...part,
input: {
placeholder:
"[XML content replaced - see current diagram XML in system context]",
},
}
}
}
return part
})
.filter(Boolean) // Remove null entries (invalid tool calls)
return { ...msg, content: replacedContent }
})
}
// Helper function to create cached stream response
function createCachedStreamResponse(xml: string): Response {
const toolCallId = `cached-${Date.now()}`
const stream = createUIMessageStream({
execute: async ({ writer }) => {
writer.write({ type: "start" })
writer.write({
type: "tool-input-start",
toolCallId,
toolName: "display_diagram",
})
writer.write({
type: "tool-input-delta",
toolCallId,
inputTextDelta: xml,
})
writer.write({
type: "tool-input-available",
toolCallId,
toolName: "display_diagram",
input: { xml },
})
writer.write({ type: "finish" })
},
})
return createUIMessageStreamResponse({ stream })
}
// Inner handler function
async function handleChatRequest(req: Request): Promise<Response> {
// Check for access code
const accessCodes =
process.env.ACCESS_CODE_LIST?.split(",")
.map((code) => code.trim())
.filter(Boolean) || []
if (accessCodes.length > 0) {
const accessCodeHeader = req.headers.get("x-access-code")
if (!accessCodeHeader || !accessCodes.includes(accessCodeHeader)) {
return Response.json(
{
error: "Invalid or missing access code. Please configure it in Settings.",
},
{ status: 401 },
)
}
}
const { messages, xml, previousXml, sessionId } = await req.json()
// Get user IP for Langfuse tracking (hashed for privacy)
const forwardedFor = req.headers.get("x-forwarded-for")
const rawIp = forwardedFor?.split(",")[0]?.trim() || "anonymous"
const userId =
rawIp === "anonymous"
? rawIp
: `user-${Buffer.from(rawIp).toString("base64url").slice(0, 8)}`
// Validate sessionId for Langfuse (must be string, max 200 chars)
const validSessionId =
sessionId && typeof sessionId === "string" && sessionId.length <= 200
? sessionId
: undefined
// Extract user input text for Langfuse trace
// Find the last USER message, not just the last message (which could be assistant in multi-step tool flows)
const lastUserMessage = [...messages]
.reverse()
.find((m: any) => m.role === "user")
const userInputText =
lastUserMessage?.parts?.find((p: any) => p.type === "text")?.text || ""
// Update Langfuse trace with input, session, and user
setTraceInput({
input: userInputText,
sessionId: validSessionId,
userId: userId,
})
// === SERVER-SIDE QUOTA CHECK START ===
// Quota is opt-in: only enabled when DYNAMODB_QUOTA_TABLE env var is set
const hasOwnApiKey = !!(
req.headers.get("x-ai-provider") && req.headers.get("x-ai-api-key")
)
// Skip quota check if: quota disabled, user has own API key, or is anonymous
if (isQuotaEnabled() && !hasOwnApiKey && userId !== "anonymous") {
const quotaCheck = await checkAndIncrementRequest(userId, {
requests: Number(process.env.DAILY_REQUEST_LIMIT) || 10,
tokens: Number(process.env.DAILY_TOKEN_LIMIT) || 200000,
tpm: Number(process.env.TPM_LIMIT) || 20000,
})
if (!quotaCheck.allowed) {
return Response.json(
{
error: quotaCheck.error,
type: quotaCheck.type,
used: quotaCheck.used,
limit: quotaCheck.limit,
},
{ status: 429 },
)
}
}
// === SERVER-SIDE QUOTA CHECK END ===
// === FILE VALIDATION START ===
const fileValidation = validateFileParts(messages)
if (!fileValidation.valid) {
return Response.json({ error: fileValidation.error }, { status: 400 })
}
// === FILE VALIDATION END ===
// === CACHE CHECK START ===
const isFirstMessage = messages.length === 1
const isEmptyDiagram = !xml || xml.trim() === "" || isMinimalDiagram(xml)
if (isFirstMessage && isEmptyDiagram) {
const lastMessage = messages[0]
const textPart = lastMessage.parts?.find((p: any) => p.type === "text")
const filePart = lastMessage.parts?.find((p: any) => p.type === "file")
const cached = findCachedResponse(textPart?.text || "", !!filePart)
if (cached) {
return createCachedStreamResponse(cached.xml)
}
}
// === CACHE CHECK END ===
// Read client AI provider overrides from headers
const clientOverrides = {
provider: req.headers.get("x-ai-provider"),
baseUrl: req.headers.get("x-ai-base-url"),
apiKey: req.headers.get("x-ai-api-key"),
modelId: req.headers.get("x-ai-model"),
feat: multi-provider model configuration with UI/UX improvements (#355) * feat: add multi-provider model configuration - Add model config dialog for managing multiple AI providers - Support for OpenAI, Anthropic, Google, Azure, Bedrock, OpenRouter, DeepSeek, SiliconFlow, Ollama, and AI Gateway - Add model selector dropdown in chat panel header - Add API key validation endpoint - Add custom model ID input with keyboard navigation - Fix hover highlight in Command component - Add suggested models for each provider including latest Claude 4.5 series - Store configuration locally in browser * feat: improve model config UI and move selector to chat input - Move model selector from header to chat input (left of send button) - Add per-model validation status (queued, running, valid, invalid) - Filter model selector to only show verified models - Add editable model IDs in config dialog - Add custom model input field alongside suggested models dropdown - Fix hover states on provider buttons and select triggers - Update OpenAI suggested models with GPT-5 series - Add alert-dialog component for delete confirmation * refactor: revert shadcn component changes, apply hover fix at usage site * feat: add AWS credentials support for Bedrock provider - Add AWS Access Key ID, Secret Access Key, Region fields for Bedrock - Show different credential fields based on provider type - Update validation API to handle Bedrock with AWS credentials - Add region selector with common AWS regions * fix: reset Test button after validation completes * fix: reset validation button to Test after success * fix: complete bedrock support and UI/UX improvements - Add bedrock to ALLOWED_CLIENT_PROVIDERS for client credentials - Pass AWS credentials through full chain (headers → API → provider) - Replace non-existent GPT-5 models with real ones (o1, o3-mini) - Add accessibility: aria-labels, focus-visible rings, inline errors - Add more AWS regions (Ohio, London, Paris, Mumbai, Seoul, São Paulo) - Fix setTimeout cleanup with useRef on component unmount - Fix TypeScript type consistency in getSelectedAIConfig fallback * chore: remove unused code - Remove unused setAccessCodeRequired state in chat-panel.tsx - Remove unused getSelectedModel export in model-config.ts * fix: UI/UX improvements for model configuration dialog - Add gradient header styling with icon badge - Change Configuration section icon from Key to Settings2 - Add duplicate model detection with warning banner and inline removal - Filter out already-added models from suggestions dropdown - Add type-to-confirm for deleting providers with 3+ models - Enhance delete confirmation dialog with warning icon - Improve model selector discoverability (show model name + chevron) - Add truncation for long model names with title tooltip - Remove AI provider settings from Settings dialog (now in Model Config) - Extract ValidationButton into reusable component * fix: prevent duplicate model IDs within same provider - Block adding model if ID already exists in provider - Block editing model ID to match existing model in provider * fix: improve duplicate model ID notifications - Add toast notification when trying to add duplicate model - Allow free typing when editing model ID, validate on blur - Show warning toast instead of blocking input * fix: improve duplicate model validation UX in config dialog - Add inline error display for duplicate model IDs - Show red border on input when error exists - Validate on blur with shake animation for edit errors - Prevent saving empty model names - Clear errors when user starts typing - Simplify error styling (small red text, no heavy chips)
2025-12-22 22:36:36 +09:00
// AWS Bedrock credentials
awsAccessKeyId: req.headers.get("x-aws-access-key-id"),
awsSecretAccessKey: req.headers.get("x-aws-secret-access-key"),
awsRegion: req.headers.get("x-aws-region"),
awsSessionToken: req.headers.get("x-aws-session-token"),
}
// Read minimal style preference from header
const minimalStyle = req.headers.get("x-minimal-style") === "true"
// Get AI model with optional client overrides
const { model, providerOptions, headers, modelId } =
getAIModel(clientOverrides)
// Check if model supports prompt caching
const shouldCache = supportsPromptCaching(modelId)
console.log(
`[Prompt Caching] ${shouldCache ? "ENABLED" : "DISABLED"} for model: ${modelId}`,
)
// Get the appropriate system prompt based on model (extended for Opus/Haiku 4.5)
const systemMessage = getSystemPrompt(modelId, minimalStyle)
// Extract file parts (images) from the last user message
const fileParts =
lastUserMessage?.parts?.filter((part: any) => part.type === "file") ||
[]
// User input only - XML is now in a separate cached system message
const formattedUserInput = `User input:
"""md
${userInputText}
"""`
// Convert UIMessages to ModelMessages and add system message
const modelMessages = await convertToModelMessages(messages)
// DEBUG: Log incoming messages structure
console.log("[route.ts] Incoming messages count:", messages.length)
messages.forEach((msg: any, idx: number) => {
console.log(
`[route.ts] Message ${idx} role:`,
msg.role,
"parts count:",
msg.parts?.length,
)
if (msg.parts) {
msg.parts.forEach((part: any, partIdx: number) => {
if (
part.type === "tool-invocation" ||
part.type === "tool-result"
) {
console.log(`[route.ts] Part ${partIdx}:`, {
type: part.type,
toolName: part.toolName,
hasInput: !!part.input,
inputType: typeof part.input,
inputKeys:
part.input && typeof part.input === "object"
? Object.keys(part.input)
: null,
})
}
})
}
})
// Replace historical tool call XML with placeholders to reduce tokens
// Disabled by default - some models (e.g. minimax) copy placeholders instead of generating XML
const enableHistoryReplace =
process.env.ENABLE_HISTORY_XML_REPLACE === "true"
const placeholderMessages = enableHistoryReplace
? replaceHistoricalToolInputs(modelMessages)
: modelMessages
// Filter out messages with empty content arrays (Bedrock API rejects these)
// This is a safety measure - ideally convertToModelMessages should handle all cases
let enhancedMessages = placeholderMessages.filter(
(msg: any) =>
msg.content && Array.isArray(msg.content) && msg.content.length > 0,
)
// Filter out tool-calls with invalid inputs (from failed repair or interrupted streaming)
// Bedrock API rejects messages where toolUse.input is not a valid JSON object
enhancedMessages = enhancedMessages
.map((msg: any) => {
if (msg.role !== "assistant" || !Array.isArray(msg.content)) {
return msg
}
const filteredContent = msg.content.filter((part: any) => {
if (part.type === "tool-call") {
// Check if input is a valid object (not null, undefined, or empty)
if (
!part.input ||
typeof part.input !== "object" ||
Object.keys(part.input).length === 0
) {
console.warn(
`[route.ts] Filtering out tool-call with invalid input:`,
{ toolName: part.toolName, input: part.input },
)
return false
}
}
return true
})
return { ...msg, content: filteredContent }
})
.filter((msg: any) => msg.content && msg.content.length > 0)
// DEBUG: Log modelMessages structure (what's being sent to AI)
console.log("[route.ts] Model messages count:", enhancedMessages.length)
enhancedMessages.forEach((msg: any, idx: number) => {
console.log(
`[route.ts] ModelMsg ${idx} role:`,
msg.role,
"content count:",
msg.content?.length,
)
if (msg.content) {
msg.content.forEach((part: any, partIdx: number) => {
if (part.type === "tool-call" || part.type === "tool-result") {
console.log(`[route.ts] Content ${partIdx}:`, {
type: part.type,
toolName: part.toolName,
hasInput: !!part.input,
inputType: typeof part.input,
inputValue:
part.input === undefined
? "undefined"
: part.input === null
? "null"
: "object",
})
}
})
}
})
// Update the last message with user input only (XML moved to separate cached system message)
if (enhancedMessages.length >= 1) {
const lastModelMessage = enhancedMessages[enhancedMessages.length - 1]
if (lastModelMessage.role === "user") {
// Build content array with user input text and file parts
const contentParts: any[] = [
{ type: "text", text: formattedUserInput },
]
// Add image parts back
for (const filePart of fileParts) {
contentParts.push({
type: "image",
image: filePart.url,
mimeType: filePart.mediaType,
})
}
enhancedMessages = [
...enhancedMessages.slice(0, -1),
{ ...lastModelMessage, content: contentParts },
]
}
2025-08-31 12:54:14 +09:00
}
// Add cache point to the last assistant message in conversation history
// This caches the entire conversation prefix for subsequent requests
// Strategy: system (cached) + history with last assistant (cached) + new user message
if (shouldCache && enhancedMessages.length >= 2) {
// Find the last assistant message (should be second-to-last, before current user message)
for (let i = enhancedMessages.length - 2; i >= 0; i--) {
if (enhancedMessages[i].role === "assistant") {
enhancedMessages[i] = {
...enhancedMessages[i],
providerOptions: {
bedrock: { cachePoint: { type: "default" } },
},
}
break // Only cache the last assistant message
}
}
}
// System messages with multiple cache breakpoints for optimal caching:
// - Breakpoint 1: Static instructions (~1500 tokens) - rarely changes
// - Breakpoint 2: Current XML context - changes per diagram, but constant within a conversation turn
// This allows: if only user message changes, both system caches are reused
// if XML changes, instruction cache is still reused
const systemMessages = [
// Cache breakpoint 1: Instructions (rarely change)
{
role: "system" as const,
content: systemMessage,
...(shouldCache && {
providerOptions: {
bedrock: { cachePoint: { type: "default" } },
},
}),
},
// Cache breakpoint 2: Previous and Current diagram XML context
{
role: "system" as const,
content: `${previousXml ? `Previous diagram XML (before user's last message):\n"""xml\n${previousXml}\n"""\n\n` : ""}Current diagram XML (AUTHORITATIVE - the source of truth):\n"""xml\n${xml || ""}\n"""\n\nIMPORTANT: The "Current diagram XML" is the SINGLE SOURCE OF TRUTH for what's on the canvas right now. The user can manually add, delete, or modify shapes directly in draw.io. Always count and describe elements based on the CURRENT XML, not on what you previously generated. If both previous and current XML are shown, compare them to understand what the user changed. When using edit_diagram, COPY search patterns exactly from the CURRENT XML - attribute order matters!`,
...(shouldCache && {
providerOptions: {
bedrock: { cachePoint: { type: "default" } },
},
}),
},
]
const allMessages = [...systemMessages, ...enhancedMessages]
const result = streamText({
model,
...(process.env.MAX_OUTPUT_TOKENS && {
maxOutputTokens: parseInt(process.env.MAX_OUTPUT_TOKENS, 10),
}),
stopWhen: stepCountIs(5),
feat: add append_diagram tool and improve truncation handling (#252) * feat: add append_diagram tool for truncation continuation When LLM output hits maxOutputTokens mid-generation, instead of failing with an error loop, the system now: 1. Detects truncation (missing </root> in XML) 2. Stores partial XML and tells LLM to use new append_diagram tool 3. LLM continues generating from where it stopped 4. Fragments are accumulated until XML is complete 5. Server limits to 5 steps via stepCountIs(5) Key changes: - Add append_diagram tool definition in route.ts - Add append_diagram handler in chat-panel.tsx - Track continuation mode separately from error mode - Continuation mode has unlimited retries (not counted against limit) - Error mode still limited to MAX_AUTO_RETRY_COUNT (1) - Update system prompts to document append_diagram tool * fix: show friendly message and yellow badge for truncated output - Add yellow 'Truncated' badge in UI instead of red 'Error' when XML is incomplete - Show friendly error message for toolUse.input is invalid errors - Built on top of append_diagram continuation feature * refactor: remove debug logs and simplify truncation state - Remove all debug console.log statements - Remove isContinuationModeRef, derive from partialXmlRef.current.length > 0 * docs: fix append_diagram instructions for consistency - Change 'Do NOT include' to 'Do NOT start with' (clearer intent) - Add <mxCell id="0"> to prohibited start patterns - Change 'closing tags </root></mxGraphModel>' to just '</root>' (wrapWithMxFile handles the rest)
2025-12-14 12:34:34 +09:00
// Repair truncated tool calls when maxOutputTokens is reached mid-JSON
experimental_repairToolCall: async ({ toolCall, error }) => {
// DEBUG: Log what we're trying to repair
console.log(`[repairToolCall] Tool: ${toolCall.toolName}`)
console.log(
`[repairToolCall] Error: ${error.name} - ${error.message}`,
)
console.log(`[repairToolCall] Input type: ${typeof toolCall.input}`)
console.log(`[repairToolCall] Input value:`, toolCall.input)
feat: add append_diagram tool and improve truncation handling (#252) * feat: add append_diagram tool for truncation continuation When LLM output hits maxOutputTokens mid-generation, instead of failing with an error loop, the system now: 1. Detects truncation (missing </root> in XML) 2. Stores partial XML and tells LLM to use new append_diagram tool 3. LLM continues generating from where it stopped 4. Fragments are accumulated until XML is complete 5. Server limits to 5 steps via stepCountIs(5) Key changes: - Add append_diagram tool definition in route.ts - Add append_diagram handler in chat-panel.tsx - Track continuation mode separately from error mode - Continuation mode has unlimited retries (not counted against limit) - Error mode still limited to MAX_AUTO_RETRY_COUNT (1) - Update system prompts to document append_diagram tool * fix: show friendly message and yellow badge for truncated output - Add yellow 'Truncated' badge in UI instead of red 'Error' when XML is incomplete - Show friendly error message for toolUse.input is invalid errors - Built on top of append_diagram continuation feature * refactor: remove debug logs and simplify truncation state - Remove all debug console.log statements - Remove isContinuationModeRef, derive from partialXmlRef.current.length > 0 * docs: fix append_diagram instructions for consistency - Change 'Do NOT include' to 'Do NOT start with' (clearer intent) - Add <mxCell id="0"> to prohibited start patterns - Change 'closing tags </root></mxGraphModel>' to just '</root>' (wrapWithMxFile handles the rest)
2025-12-14 12:34:34 +09:00
// Only attempt repair for invalid tool input (broken JSON from truncation)
if (
error instanceof InvalidToolInputError ||
error.name === "AI_InvalidToolInputError"
) {
try {
// Pre-process to fix common LLM JSON errors that jsonrepair can't handle
let inputToRepair = toolCall.input
if (typeof inputToRepair === "string") {
// Fix `:=` instead of `: ` (LLM sometimes generates this)
inputToRepair = inputToRepair.replace(/:=/g, ": ")
// Fix `= "` instead of `: "`
inputToRepair = inputToRepair.replace(/=\s*"/g, ': "')
}
feat: add append_diagram tool and improve truncation handling (#252) * feat: add append_diagram tool for truncation continuation When LLM output hits maxOutputTokens mid-generation, instead of failing with an error loop, the system now: 1. Detects truncation (missing </root> in XML) 2. Stores partial XML and tells LLM to use new append_diagram tool 3. LLM continues generating from where it stopped 4. Fragments are accumulated until XML is complete 5. Server limits to 5 steps via stepCountIs(5) Key changes: - Add append_diagram tool definition in route.ts - Add append_diagram handler in chat-panel.tsx - Track continuation mode separately from error mode - Continuation mode has unlimited retries (not counted against limit) - Error mode still limited to MAX_AUTO_RETRY_COUNT (1) - Update system prompts to document append_diagram tool * fix: show friendly message and yellow badge for truncated output - Add yellow 'Truncated' badge in UI instead of red 'Error' when XML is incomplete - Show friendly error message for toolUse.input is invalid errors - Built on top of append_diagram continuation feature * refactor: remove debug logs and simplify truncation state - Remove all debug console.log statements - Remove isContinuationModeRef, derive from partialXmlRef.current.length > 0 * docs: fix append_diagram instructions for consistency - Change 'Do NOT include' to 'Do NOT start with' (clearer intent) - Add <mxCell id="0"> to prohibited start patterns - Change 'closing tags </root></mxGraphModel>' to just '</root>' (wrapWithMxFile handles the rest)
2025-12-14 12:34:34 +09:00
// Use jsonrepair to fix truncated JSON
const repairedInput = jsonrepair(inputToRepair)
feat: add append_diagram tool and improve truncation handling (#252) * feat: add append_diagram tool for truncation continuation When LLM output hits maxOutputTokens mid-generation, instead of failing with an error loop, the system now: 1. Detects truncation (missing </root> in XML) 2. Stores partial XML and tells LLM to use new append_diagram tool 3. LLM continues generating from where it stopped 4. Fragments are accumulated until XML is complete 5. Server limits to 5 steps via stepCountIs(5) Key changes: - Add append_diagram tool definition in route.ts - Add append_diagram handler in chat-panel.tsx - Track continuation mode separately from error mode - Continuation mode has unlimited retries (not counted against limit) - Error mode still limited to MAX_AUTO_RETRY_COUNT (1) - Update system prompts to document append_diagram tool * fix: show friendly message and yellow badge for truncated output - Add yellow 'Truncated' badge in UI instead of red 'Error' when XML is incomplete - Show friendly error message for toolUse.input is invalid errors - Built on top of append_diagram continuation feature * refactor: remove debug logs and simplify truncation state - Remove all debug console.log statements - Remove isContinuationModeRef, derive from partialXmlRef.current.length > 0 * docs: fix append_diagram instructions for consistency - Change 'Do NOT include' to 'Do NOT start with' (clearer intent) - Add <mxCell id="0"> to prohibited start patterns - Change 'closing tags </root></mxGraphModel>' to just '</root>' (wrapWithMxFile handles the rest)
2025-12-14 12:34:34 +09:00
console.log(
`[repairToolCall] Repaired truncated JSON for tool: ${toolCall.toolName}`,
)
return { ...toolCall, input: repairedInput }
} catch (repairError) {
console.warn(
`[repairToolCall] Failed to repair JSON for tool: ${toolCall.toolName}`,
repairError,
)
// Return a placeholder input to avoid API errors in multi-step
// The tool will fail gracefully on client side
if (toolCall.toolName === "edit_diagram") {
return {
...toolCall,
input: {
operations: [],
_error: "JSON repair failed - no operations to apply",
},
}
}
if (toolCall.toolName === "display_diagram") {
return {
...toolCall,
input: {
xml: "",
_error: "JSON repair failed - empty diagram",
},
}
}
feat: add append_diagram tool and improve truncation handling (#252) * feat: add append_diagram tool for truncation continuation When LLM output hits maxOutputTokens mid-generation, instead of failing with an error loop, the system now: 1. Detects truncation (missing </root> in XML) 2. Stores partial XML and tells LLM to use new append_diagram tool 3. LLM continues generating from where it stopped 4. Fragments are accumulated until XML is complete 5. Server limits to 5 steps via stepCountIs(5) Key changes: - Add append_diagram tool definition in route.ts - Add append_diagram handler in chat-panel.tsx - Track continuation mode separately from error mode - Continuation mode has unlimited retries (not counted against limit) - Error mode still limited to MAX_AUTO_RETRY_COUNT (1) - Update system prompts to document append_diagram tool * fix: show friendly message and yellow badge for truncated output - Add yellow 'Truncated' badge in UI instead of red 'Error' when XML is incomplete - Show friendly error message for toolUse.input is invalid errors - Built on top of append_diagram continuation feature * refactor: remove debug logs and simplify truncation state - Remove all debug console.log statements - Remove isContinuationModeRef, derive from partialXmlRef.current.length > 0 * docs: fix append_diagram instructions for consistency - Change 'Do NOT include' to 'Do NOT start with' (clearer intent) - Add <mxCell id="0"> to prohibited start patterns - Change 'closing tags </root></mxGraphModel>' to just '</root>' (wrapWithMxFile handles the rest)
2025-12-14 12:34:34 +09:00
return null
}
}
// Don't attempt to repair other errors (like NoSuchToolError)
return null
},
messages: allMessages,
feat: Display AI reasoning/thinking blocks in chat interface (#152) * feat: Add reasoning/thinking blocks display in chat interface * feat: add multi-provider options support and replace custom reasoning UI with AI Elements * resolve conflicting reasoning configs and correct provider-specific reasoning parameters * try to solve conflict * fix: simplify reasoning display and remove unnecessary dependencies - Remove Streamdown dependency (~5MB) - reasoning is plain text only - Fix Bedrock providerOptions merging for Claude reasoning configs - Remove unsupported DeepSeek reasoning configuration - Clean up unused environment variables (REASONING_BUDGET_TOKENS, REASONING_EFFORT, DEEPSEEK_REASONING_*) - Remove dead commented code from route.ts Reasoning blocks contain plain thinking text and don't need markdown/diagram/code rendering. * feat: comprehensive reasoning support improvements Major improvements: - Auto-enable reasoning display for all supported models - Fix provider-specific reasoning configurations - Remove unnecessary Streamdown dependency (~5MB) - Clean up debug logging Provider changes: - OpenAI: Auto-enable reasoningSummary for o1/o3/gpt-5 models - Google: Auto-enable includeThoughts for Gemini 2.5/3 models - Bedrock: Restrict reasoningConfig to only Claude/Nova (fixes MiniMax error) - Ollama: Add thinking support for qwen3-like models Other improvements: - Remove ENABLE_REASONING toggle (always enabled) - Fix Bedrock providerOptions merging for Claude - Simplify reasoning component (plain text rendering) - Clean up unused environment variables * fix: critical bugs and documentation gaps in reasoning support Critical fixes: - Fix Bedrock shallow merge bug (deep merge preserves anthropicBeta + reasoningConfig) - Add parseInt validation with parseIntSafe helper (prevents NaN errors) - Validate all numeric env vars with min/max ranges Documentation improvements: - Add BEDROCK_REASONING_BUDGET_TOKENS and BEDROCK_REASONING_EFFORT to env.example - Add OLLAMA_ENABLE_THINKING to env.example - Update JSDoc with accurate env var list and ranges Code cleanup: - Remove debug console.log statements from route.ts - Refactor duplicate providerOptions assignments --------- Co-authored-by: Dayuan Jiang <34411969+DayuanJiang@users.noreply.github.com> Co-authored-by: Dayuan Jiang <jdy.toh@gmail.com>
2025-12-10 20:54:43 +05:30
...(providerOptions && { providerOptions }), // This now includes all reasoning configs
...(headers && { headers }),
// Langfuse telemetry config (returns undefined if not configured)
...(getTelemetryConfig({ sessionId: validSessionId, userId }) && {
experimental_telemetry: getTelemetryConfig({
sessionId: validSessionId,
userId,
}),
}),
onFinish: ({ text, usage }) => {
// AI SDK 6 telemetry auto-reports token usage on its spans
setTraceOutput(text)
// Record token usage for server-side quota tracking (if enabled)
if (
isQuotaEnabled() &&
!hasOwnApiKey &&
userId !== "anonymous" &&
usage
) {
const totalTokens =
(usage.inputTokens || 0) + (usage.outputTokens || 0)
recordTokenUsage(userId, totalTokens)
}
},
tools: {
// Client-side tool that will be executed on the client
display_diagram: {
description: `Display a diagram on draw.io. Pass ONLY the mxCell elements - wrapper tags and root cells are added automatically.
VALIDATION RULES (XML will be rejected if violated):
1. Generate ONLY mxCell elements - NO wrapper tags (<mxfile>, <mxGraphModel>, <root>)
2. Do NOT include root cells (id="0" or id="1") - they are added automatically
3. All mxCell elements must be siblings - never nested
4. Every mxCell needs a unique id (start from "2")
5. Every mxCell needs a valid parent attribute (use "1" for top-level)
6. Escape special chars in values: &lt; &gt; &amp; &quot;
Example (generate ONLY this - no wrapper tags):
<mxCell id="lane1" value="Frontend" style="swimlane;" vertex="1" parent="1">
<mxGeometry x="40" y="40" width="200" height="200" as="geometry"/>
</mxCell>
<mxCell id="step1" value="Step 1" style="rounded=1;" vertex="1" parent="lane1">
<mxGeometry x="20" y="60" width="160" height="40" as="geometry"/>
</mxCell>
<mxCell id="lane2" value="Backend" style="swimlane;" vertex="1" parent="1">
<mxGeometry x="280" y="40" width="200" height="200" as="geometry"/>
</mxCell>
<mxCell id="step2" value="Step 2" style="rounded=1;" vertex="1" parent="lane2">
<mxGeometry x="20" y="60" width="160" height="40" as="geometry"/>
</mxCell>
<mxCell id="edge1" style="edgeStyle=orthogonalEdgeStyle;endArrow=classic;" edge="1" parent="1" source="step1" target="step2">
<mxGeometry relative="1" as="geometry"/>
</mxCell>
Notes:
- For AWS diagrams, use **AWS 2025 icons**.
- For animated connectors, add "flowAnimation=1" to edge style.
`,
inputSchema: z.object({
xml: z
.string()
.describe("XML string to be displayed on draw.io"),
}),
},
edit_diagram: {
description: `Edit the current diagram by ID-based operations (update/add/delete cells).
Operations:
- update: Replace an existing cell by its id. Provide cell_id and complete new_xml.
- add: Add a new cell. Provide cell_id (new unique id) and new_xml.
- delete: Remove a cell by its id. Only cell_id is needed.
For update/add, new_xml must be a complete mxCell element including mxGeometry.
JSON ESCAPING: Every " inside new_xml MUST be escaped as \\". Example: id=\\"5\\" value=\\"Label\\"`,
inputSchema: z.object({
operations: z
.array(
z.object({
type: z
.enum(["update", "add", "delete"])
.describe("Operation type"),
cell_id: z
.string()
.describe(
"The id of the mxCell. Must match the id attribute in new_xml.",
),
new_xml: z
.string()
.optional()
.describe(
"Complete mxCell XML element (required for update/add)",
),
}),
)
.describe("Array of operations to apply"),
}),
},
feat: add append_diagram tool and improve truncation handling (#252) * feat: add append_diagram tool for truncation continuation When LLM output hits maxOutputTokens mid-generation, instead of failing with an error loop, the system now: 1. Detects truncation (missing </root> in XML) 2. Stores partial XML and tells LLM to use new append_diagram tool 3. LLM continues generating from where it stopped 4. Fragments are accumulated until XML is complete 5. Server limits to 5 steps via stepCountIs(5) Key changes: - Add append_diagram tool definition in route.ts - Add append_diagram handler in chat-panel.tsx - Track continuation mode separately from error mode - Continuation mode has unlimited retries (not counted against limit) - Error mode still limited to MAX_AUTO_RETRY_COUNT (1) - Update system prompts to document append_diagram tool * fix: show friendly message and yellow badge for truncated output - Add yellow 'Truncated' badge in UI instead of red 'Error' when XML is incomplete - Show friendly error message for toolUse.input is invalid errors - Built on top of append_diagram continuation feature * refactor: remove debug logs and simplify truncation state - Remove all debug console.log statements - Remove isContinuationModeRef, derive from partialXmlRef.current.length > 0 * docs: fix append_diagram instructions for consistency - Change 'Do NOT include' to 'Do NOT start with' (clearer intent) - Add <mxCell id="0"> to prohibited start patterns - Change 'closing tags </root></mxGraphModel>' to just '</root>' (wrapWithMxFile handles the rest)
2025-12-14 12:34:34 +09:00
append_diagram: {
description: `Continue generating diagram XML when previous display_diagram output was truncated due to length limits.
WHEN TO USE: Only call this tool after display_diagram was truncated (you'll see an error message about truncation).
CRITICAL INSTRUCTIONS:
1. Do NOT include any wrapper tags - just continue the mxCell elements
feat: add append_diagram tool and improve truncation handling (#252) * feat: add append_diagram tool for truncation continuation When LLM output hits maxOutputTokens mid-generation, instead of failing with an error loop, the system now: 1. Detects truncation (missing </root> in XML) 2. Stores partial XML and tells LLM to use new append_diagram tool 3. LLM continues generating from where it stopped 4. Fragments are accumulated until XML is complete 5. Server limits to 5 steps via stepCountIs(5) Key changes: - Add append_diagram tool definition in route.ts - Add append_diagram handler in chat-panel.tsx - Track continuation mode separately from error mode - Continuation mode has unlimited retries (not counted against limit) - Error mode still limited to MAX_AUTO_RETRY_COUNT (1) - Update system prompts to document append_diagram tool * fix: show friendly message and yellow badge for truncated output - Add yellow 'Truncated' badge in UI instead of red 'Error' when XML is incomplete - Show friendly error message for toolUse.input is invalid errors - Built on top of append_diagram continuation feature * refactor: remove debug logs and simplify truncation state - Remove all debug console.log statements - Remove isContinuationModeRef, derive from partialXmlRef.current.length > 0 * docs: fix append_diagram instructions for consistency - Change 'Do NOT include' to 'Do NOT start with' (clearer intent) - Add <mxCell id="0"> to prohibited start patterns - Change 'closing tags </root></mxGraphModel>' to just '</root>' (wrapWithMxFile handles the rest)
2025-12-14 12:34:34 +09:00
2. Continue from EXACTLY where your previous output stopped
3. Complete the remaining mxCell elements
feat: add append_diagram tool and improve truncation handling (#252) * feat: add append_diagram tool for truncation continuation When LLM output hits maxOutputTokens mid-generation, instead of failing with an error loop, the system now: 1. Detects truncation (missing </root> in XML) 2. Stores partial XML and tells LLM to use new append_diagram tool 3. LLM continues generating from where it stopped 4. Fragments are accumulated until XML is complete 5. Server limits to 5 steps via stepCountIs(5) Key changes: - Add append_diagram tool definition in route.ts - Add append_diagram handler in chat-panel.tsx - Track continuation mode separately from error mode - Continuation mode has unlimited retries (not counted against limit) - Error mode still limited to MAX_AUTO_RETRY_COUNT (1) - Update system prompts to document append_diagram tool * fix: show friendly message and yellow badge for truncated output - Add yellow 'Truncated' badge in UI instead of red 'Error' when XML is incomplete - Show friendly error message for toolUse.input is invalid errors - Built on top of append_diagram continuation feature * refactor: remove debug logs and simplify truncation state - Remove all debug console.log statements - Remove isContinuationModeRef, derive from partialXmlRef.current.length > 0 * docs: fix append_diagram instructions for consistency - Change 'Do NOT include' to 'Do NOT start with' (clearer intent) - Add <mxCell id="0"> to prohibited start patterns - Change 'closing tags </root></mxGraphModel>' to just '</root>' (wrapWithMxFile handles the rest)
2025-12-14 12:34:34 +09:00
4. If still truncated, call append_diagram again with the next fragment
Example: If previous output ended with '<mxCell id="x" style="rounded=1', continue with ';" vertex="1">...' and complete the remaining elements.`,
inputSchema: z.object({
xml: z
.string()
.describe(
"Continuation XML fragment to append (NO wrapper tags)",
),
}),
},
get_shape_library: {
description: `Get draw.io shape/icon library documentation with style syntax and shape names.
Available libraries:
- Cloud: aws4, azure2, gcp2, alibaba_cloud, openstack, salesforce
- Networking: cisco19, network, kubernetes, vvd, rack
- Business: bpmn, lean_mapping
- General: flowchart, basic, arrows2, infographic, sitemap
- UI/Mockups: android
- Enterprise: citrix, sap, mscae, atlassian
- Engineering: fluidpower, electrical, pid, cabinets, floorplan
- Icons: webicons
Call this tool to get shape names and usage syntax for a specific library.`,
inputSchema: z.object({
library: z
.string()
.describe(
"Library name (e.g., 'aws4', 'kubernetes', 'flowchart')",
),
}),
execute: async ({ library }) => {
// Sanitize input - prevent path traversal attacks
const sanitizedLibrary = library
.toLowerCase()
.replace(/[^a-z0-9_-]/g, "")
if (sanitizedLibrary !== library.toLowerCase()) {
return `Invalid library name "${library}". Use only letters, numbers, underscores, and hyphens.`
}
const baseDir = path.join(
process.cwd(),
"docs/shape-libraries",
)
const filePath = path.join(
baseDir,
`${sanitizedLibrary}.md`,
)
// Verify path stays within expected directory
const resolvedPath = path.resolve(filePath)
if (!resolvedPath.startsWith(path.resolve(baseDir))) {
return `Invalid library path.`
}
try {
const content = await fs.readFile(filePath, "utf-8")
return content
} catch (error) {
if (
(error as NodeJS.ErrnoException).code === "ENOENT"
) {
return `Library "${library}" not found. Available: aws4, azure2, gcp2, alibaba_cloud, cisco19, kubernetes, network, bpmn, flowchart, basic, arrows2, vvd, salesforce, citrix, sap, mscae, atlassian, fluidpower, electrical, pid, cabinets, floorplan, webicons, infographic, sitemap, android, lean_mapping, openstack, rack`
}
console.error(
`[get_shape_library] Error loading "${library}":`,
error,
)
return `Error loading library "${library}". Please try again.`
}
},
},
},
...(process.env.TEMPERATURE !== undefined && {
temperature: parseFloat(process.env.TEMPERATURE),
}),
})
return result.toUIMessageStreamResponse({
feat: Display AI reasoning/thinking blocks in chat interface (#152) * feat: Add reasoning/thinking blocks display in chat interface * feat: add multi-provider options support and replace custom reasoning UI with AI Elements * resolve conflicting reasoning configs and correct provider-specific reasoning parameters * try to solve conflict * fix: simplify reasoning display and remove unnecessary dependencies - Remove Streamdown dependency (~5MB) - reasoning is plain text only - Fix Bedrock providerOptions merging for Claude reasoning configs - Remove unsupported DeepSeek reasoning configuration - Clean up unused environment variables (REASONING_BUDGET_TOKENS, REASONING_EFFORT, DEEPSEEK_REASONING_*) - Remove dead commented code from route.ts Reasoning blocks contain plain thinking text and don't need markdown/diagram/code rendering. * feat: comprehensive reasoning support improvements Major improvements: - Auto-enable reasoning display for all supported models - Fix provider-specific reasoning configurations - Remove unnecessary Streamdown dependency (~5MB) - Clean up debug logging Provider changes: - OpenAI: Auto-enable reasoningSummary for o1/o3/gpt-5 models - Google: Auto-enable includeThoughts for Gemini 2.5/3 models - Bedrock: Restrict reasoningConfig to only Claude/Nova (fixes MiniMax error) - Ollama: Add thinking support for qwen3-like models Other improvements: - Remove ENABLE_REASONING toggle (always enabled) - Fix Bedrock providerOptions merging for Claude - Simplify reasoning component (plain text rendering) - Clean up unused environment variables * fix: critical bugs and documentation gaps in reasoning support Critical fixes: - Fix Bedrock shallow merge bug (deep merge preserves anthropicBeta + reasoningConfig) - Add parseInt validation with parseIntSafe helper (prevents NaN errors) - Validate all numeric env vars with min/max ranges Documentation improvements: - Add BEDROCK_REASONING_BUDGET_TOKENS and BEDROCK_REASONING_EFFORT to env.example - Add OLLAMA_ENABLE_THINKING to env.example - Update JSDoc with accurate env var list and ranges Code cleanup: - Remove debug console.log statements from route.ts - Refactor duplicate providerOptions assignments --------- Co-authored-by: Dayuan Jiang <34411969+DayuanJiang@users.noreply.github.com> Co-authored-by: Dayuan Jiang <jdy.toh@gmail.com>
2025-12-10 20:54:43 +05:30
sendReasoning: true,
messageMetadata: ({ part }) => {
if (part.type === "finish") {
const usage = (part as any).totalUsage
// AI SDK 6 provides totalTokens directly
return {
totalTokens: usage?.totalTokens ?? 0,
feat: add append_diagram tool and improve truncation handling (#252) * feat: add append_diagram tool for truncation continuation When LLM output hits maxOutputTokens mid-generation, instead of failing with an error loop, the system now: 1. Detects truncation (missing </root> in XML) 2. Stores partial XML and tells LLM to use new append_diagram tool 3. LLM continues generating from where it stopped 4. Fragments are accumulated until XML is complete 5. Server limits to 5 steps via stepCountIs(5) Key changes: - Add append_diagram tool definition in route.ts - Add append_diagram handler in chat-panel.tsx - Track continuation mode separately from error mode - Continuation mode has unlimited retries (not counted against limit) - Error mode still limited to MAX_AUTO_RETRY_COUNT (1) - Update system prompts to document append_diagram tool * fix: show friendly message and yellow badge for truncated output - Add yellow 'Truncated' badge in UI instead of red 'Error' when XML is incomplete - Show friendly error message for toolUse.input is invalid errors - Built on top of append_diagram continuation feature * refactor: remove debug logs and simplify truncation state - Remove all debug console.log statements - Remove isContinuationModeRef, derive from partialXmlRef.current.length > 0 * docs: fix append_diagram instructions for consistency - Change 'Do NOT include' to 'Do NOT start with' (clearer intent) - Add <mxCell id="0"> to prohibited start patterns - Change 'closing tags </root></mxGraphModel>' to just '</root>' (wrapWithMxFile handles the rest)
2025-12-14 12:34:34 +09:00
finishReason: (part as any).finishReason,
}
}
return undefined
},
})
}
// Helper to categorize errors and return appropriate response
function handleError(error: unknown): Response {
console.error("Error in chat route:", error)
const isDev = process.env.NODE_ENV === "development"
// Check for specific AI SDK error types
if (APICallError.isInstance(error)) {
return Response.json(
{
error: error.message,
...(isDev && {
details: error.responseBody,
stack: error.stack,
}),
},
{ status: error.statusCode || 500 },
)
}
if (LoadAPIKeyError.isInstance(error)) {
return Response.json(
{
error: "Authentication failed. Please check your API key.",
...(isDev && {
stack: error.stack,
}),
},
{ status: 401 },
)
}
// Fallback for other errors with safety filter
const message =
error instanceof Error ? error.message : "An unexpected error occurred"
const status = (error as any)?.statusCode || (error as any)?.status || 500
// Prevent leaking API keys, tokens, or other sensitive data
const lowerMessage = message.toLowerCase()
const safeMessage =
lowerMessage.includes("key") ||
lowerMessage.includes("token") ||
lowerMessage.includes("sig") ||
lowerMessage.includes("signature") ||
lowerMessage.includes("secret") ||
lowerMessage.includes("password") ||
lowerMessage.includes("credential")
? "Authentication failed. Please check your credentials."
: message
return Response.json(
{
error: safeMessage,
...(isDev && {
details: message,
stack: error instanceof Error ? error.stack : undefined,
}),
},
{ status },
)
}
// Wrap handler with error handling
async function safeHandler(req: Request): Promise<Response> {
try {
return await handleChatRequest(req)
} catch (error) {
return handleError(error)
}
}
// Wrap with Langfuse observe (if configured)
const observedHandler = wrapWithObserve(safeHandler)
export async function POST(req: Request) {
return observedHandler(req)
}