import { streamText, convertToModelMessages, createUIMessageStream, createUIMessageStreamResponse } from 'ai'; import { getAIModel } from '@/lib/ai-providers'; import { findCachedResponse } from '@/lib/cached-responses'; import { getSystemPrompt } from '@/lib/system-prompts'; import { z } from "zod"; export const maxDuration = 300; // Helper function to check if diagram is minimal/empty function isMinimalDiagram(xml: string): boolean { const stripped = xml.replace(/\s/g, ''); return !stripped.includes('id="2"'); } // Helper function to create cached stream response function createCachedStreamResponse(xml: string): Response { const toolCallId = `cached-${Date.now()}`; const stream = createUIMessageStream({ execute: async ({ writer }) => { writer.write({ type: 'start' }); writer.write({ type: 'tool-input-start', toolCallId, toolName: 'display_diagram' }); writer.write({ type: 'tool-input-delta', toolCallId, inputTextDelta: xml }); writer.write({ type: 'tool-input-available', toolCallId, toolName: 'display_diagram', input: { xml } }); writer.write({ type: 'finish' }); }, }); return createUIMessageStreamResponse({ stream }); } // Inner handler function async function handleChatRequest(req: Request): Promise { const { messages, xml } = await req.json(); // === CACHE CHECK START === const isFirstMessage = messages.length === 1; const isEmptyDiagram = !xml || xml.trim() === '' || isMinimalDiagram(xml); if (isFirstMessage && isEmptyDiagram) { const lastMessage = messages[0]; const textPart = lastMessage.parts?.find((p: any) => p.type === 'text'); const filePart = lastMessage.parts?.find((p: any) => p.type === 'file'); const cached = findCachedResponse(textPart?.text || '', !!filePart); if (cached) { console.log('[Cache] Returning cached response for:', textPart?.text); return createCachedStreamResponse(cached.xml); } } // === CACHE CHECK END === // Get AI model from environment configuration const { model, providerOptions, headers, modelId } = getAIModel(); // Get the appropriate system prompt based on model (extended for Opus/Haiku 4.5) const systemMessage = getSystemPrompt(modelId); const lastMessage = messages[messages.length - 1]; // Extract text from the last message parts const lastMessageText = lastMessage.parts?.find((part: any) => part.type === 'text')?.text || ''; // Extract file parts (images) from the last message const fileParts = lastMessage.parts?.filter((part: any) => part.type === 'file') || []; // User input only - XML is now in a separate cached system message const formattedUserInput = `User input: """md ${lastMessageText} """`; // Convert UIMessages to ModelMessages and add system message const modelMessages = convertToModelMessages(messages); // Filter out messages with empty content arrays (Bedrock API rejects these) // This is a safety measure - ideally convertToModelMessages should handle all cases let enhancedMessages = modelMessages.filter((msg: any) => msg.content && Array.isArray(msg.content) && msg.content.length > 0 ); // Update the last message with user input only (XML moved to separate cached system message) if (enhancedMessages.length >= 1) { const lastModelMessage = enhancedMessages[enhancedMessages.length - 1]; if (lastModelMessage.role === 'user') { // Build content array with user input text and file parts const contentParts: any[] = [ { type: 'text', text: formattedUserInput } ]; // Add image parts back for (const filePart of fileParts) { contentParts.push({ type: 'image', image: filePart.url, mimeType: filePart.mediaType }); } enhancedMessages = [ ...enhancedMessages.slice(0, -1), { ...lastModelMessage, content: contentParts } ]; } } // Add cache point to the last assistant message in conversation history // This caches the entire conversation prefix for subsequent requests // Strategy: system (cached) + history with last assistant (cached) + new user message if (enhancedMessages.length >= 2) { // Find the last assistant message (should be second-to-last, before current user message) for (let i = enhancedMessages.length - 2; i >= 0; i--) { if (enhancedMessages[i].role === 'assistant') { enhancedMessages[i] = { ...enhancedMessages[i], providerOptions: { bedrock: { cachePoint: { type: 'default' } }, }, }; break; // Only cache the last assistant message } } } // System messages with multiple cache breakpoints for optimal caching: // - Breakpoint 1: Static instructions (~1500 tokens) - rarely changes // - Breakpoint 2: Current XML context - changes per diagram, but constant within a conversation turn // This allows: if only user message changes, both system caches are reused // if XML changes, instruction cache is still reused const systemMessages = [ // Cache breakpoint 1: Instructions (rarely change) { role: 'system' as const, content: systemMessage, providerOptions: { bedrock: { cachePoint: { type: 'default' } }, }, }, // Cache breakpoint 2: Current diagram XML context { role: 'system' as const, content: `Current diagram XML:\n"""xml\n${xml || ''}\n"""\nWhen using edit_diagram, COPY search patterns exactly from this XML - attribute order matters!`, providerOptions: { bedrock: { cachePoint: { type: 'default' } }, }, }, ]; const allMessages = [...systemMessages, ...enhancedMessages]; const result = streamText({ model, messages: allMessages, ...(providerOptions && { providerOptions }), ...(headers && { headers }), onFinish: ({ usage, providerMetadata }) => { console.log('[Cache] Full providerMetadata:', JSON.stringify(providerMetadata, null, 2)); console.log('[Cache] Usage:', JSON.stringify(usage, null, 2)); }, tools: { // Client-side tool that will be executed on the client display_diagram: { description: `Display a diagram on draw.io. Pass the XML content inside tags. VALIDATION RULES (XML will be rejected if violated): 1. All mxCell elements must be DIRECT children of - never nested 2. Every mxCell needs a unique id 3. Every mxCell (except id="0") needs a valid parent attribute 4. Edge source/target must reference existing cell IDs 5. Escape special chars in values: < > & " 6. Always start with: Example with swimlanes and edges (note: all mxCells are siblings): Notes: - For AWS diagrams, use **AWS 2025 icons**. - For animated connectors, add "flowAnimation=1" to edge style. `, inputSchema: z.object({ xml: z.string().describe("XML string to be displayed on draw.io") }) }, edit_diagram: { description: `Edit specific parts of the current diagram by replacing exact line matches. Use this tool to make targeted fixes without regenerating the entire XML. CRITICAL: Copy-paste the EXACT search pattern from the "Current diagram XML" in system context. Do NOT reorder attributes or reformat - the attribute order in draw.io XML varies and you MUST match it exactly. IMPORTANT: Keep edits concise: - COPY the exact mxCell line from the current XML (attribute order matters!) - Only include the lines that are changing, plus 1-2 surrounding lines for context if needed - Break large changes into multiple smaller edits - Each search must contain complete lines (never truncate mid-line) - First match only - be specific enough to target the right element`, inputSchema: z.object({ edits: z.array(z.object({ search: z.string().describe("EXACT lines copied from current XML (preserve attribute order!)"), replace: z.string().describe("Replacement lines") })).describe("Array of search/replace pairs to apply sequentially") }) }, }, temperature: 0, }); // Error handler function to provide detailed error messages function errorHandler(error: unknown) { if (error == null) { return 'unknown error'; } const errorString = typeof error === 'string' ? error : error instanceof Error ? error.message : JSON.stringify(error); // Check for image not supported error (e.g., DeepSeek models) if (errorString.includes('image_url') || errorString.includes('unknown variant') || (errorString.includes('image') && errorString.includes('not supported'))) { return 'This model does not support image inputs. Please remove the image and try again, or switch to a vision-capable model.'; } return errorString; } return result.toUIMessageStreamResponse({ onError: errorHandler, }); } export async function POST(req: Request) { try { return await handleChatRequest(req); } catch (error) { console.error('Error in chat route:', error); return Response.json({ error: 'Internal server error' }, { status: 500 }); } }