Files
next-ai-draw-io/app/api/chat/route.ts
Dayuan Jiang 3534cb13f7 refactor: extract system prompts and add extended prompt for Opus/Haiku 4.5 (#71)
- Extract system prompts to dedicated lib/system-prompts.ts module
- Add extended system prompt (~4000 tokens) for models with higher cache minimums (Opus 4.5, Haiku 4.5)
- Clean up debug logs while preserving informational and cache-related logs
- Improve code formatting and organization in chat route
2025-12-04 13:26:06 +09:00

292 lines
11 KiB
TypeScript

import { streamText, convertToModelMessages, createUIMessageStream, createUIMessageStreamResponse } from 'ai';
import { getAIModel } from '@/lib/ai-providers';
import { findCachedResponse } from '@/lib/cached-responses';
import { setTraceInput, setTraceOutput, getTelemetryConfig, wrapWithObserve } from '@/lib/langfuse';
import { getSystemPrompt } from '@/lib/system-prompts';
import { z } from "zod";
export const maxDuration = 300;
// Helper function to check if diagram is minimal/empty
function isMinimalDiagram(xml: string): boolean {
const stripped = xml.replace(/\s/g, '');
return !stripped.includes('id="2"');
}
// Helper function to create cached stream response
function createCachedStreamResponse(xml: string): Response {
const toolCallId = `cached-${Date.now()}`;
const stream = createUIMessageStream({
execute: async ({ writer }) => {
writer.write({ type: 'start' });
writer.write({ type: 'tool-input-start', toolCallId, toolName: 'display_diagram' });
writer.write({ type: 'tool-input-delta', toolCallId, inputTextDelta: xml });
writer.write({ type: 'tool-input-available', toolCallId, toolName: 'display_diagram', input: { xml } });
writer.write({ type: 'finish' });
},
});
return createUIMessageStreamResponse({ stream });
}
// Inner handler function
async function handleChatRequest(req: Request): Promise<Response> {
const { messages, xml, sessionId } = await req.json();
// Get user IP for Langfuse tracking
const forwardedFor = req.headers.get('x-forwarded-for');
const userId = forwardedFor?.split(',')[0]?.trim() || 'anonymous';
// Validate sessionId for Langfuse (must be string, max 200 chars)
const validSessionId = sessionId && typeof sessionId === 'string' && sessionId.length <= 200
? sessionId
: undefined;
// Extract user input text for Langfuse trace
const currentMessage = messages[messages.length - 1];
const userInputText = currentMessage?.parts?.find((p: any) => p.type === 'text')?.text || '';
// Update Langfuse trace with input, session, and user
setTraceInput({
input: userInputText,
sessionId: validSessionId,
userId: userId,
});
// === CACHE CHECK START ===
const isFirstMessage = messages.length === 1;
const isEmptyDiagram = !xml || xml.trim() === '' || isMinimalDiagram(xml);
if (isFirstMessage && isEmptyDiagram) {
const lastMessage = messages[0];
const textPart = lastMessage.parts?.find((p: any) => p.type === 'text');
const filePart = lastMessage.parts?.find((p: any) => p.type === 'file');
const cached = findCachedResponse(textPart?.text || '', !!filePart);
if (cached) {
console.log('[Cache] Returning cached response for:', textPart?.text);
return createCachedStreamResponse(cached.xml);
}
}
// === CACHE CHECK END ===
// Get AI model from environment configuration
const { model, providerOptions, headers, modelId } = getAIModel();
// Get the appropriate system prompt based on model (extended for Opus/Haiku 4.5)
const systemMessage = getSystemPrompt(modelId);
const lastMessage = messages[messages.length - 1];
// Extract text from the last message parts
const lastMessageText = lastMessage.parts?.find((part: any) => part.type === 'text')?.text || '';
// Extract file parts (images) from the last message
const fileParts = lastMessage.parts?.filter((part: any) => part.type === 'file') || [];
// User input only - XML is now in a separate cached system message
const formattedUserInput = `User input:
"""md
${lastMessageText}
"""`;
// Convert UIMessages to ModelMessages and add system message
const modelMessages = convertToModelMessages(messages);
// Filter out messages with empty content arrays (Bedrock API rejects these)
// This is a safety measure - ideally convertToModelMessages should handle all cases
let enhancedMessages = modelMessages.filter((msg: any) =>
msg.content && Array.isArray(msg.content) && msg.content.length > 0
);
// Update the last message with user input only (XML moved to separate cached system message)
if (enhancedMessages.length >= 1) {
const lastModelMessage = enhancedMessages[enhancedMessages.length - 1];
if (lastModelMessage.role === 'user') {
// Build content array with user input text and file parts
const contentParts: any[] = [
{ type: 'text', text: formattedUserInput }
];
// Add image parts back
for (const filePart of fileParts) {
contentParts.push({
type: 'image',
image: filePart.url,
mimeType: filePart.mediaType
});
}
enhancedMessages = [
...enhancedMessages.slice(0, -1),
{ ...lastModelMessage, content: contentParts }
];
}
}
// Add cache point to the last assistant message in conversation history
// This caches the entire conversation prefix for subsequent requests
// Strategy: system (cached) + history with last assistant (cached) + new user message
if (enhancedMessages.length >= 2) {
// Find the last assistant message (should be second-to-last, before current user message)
for (let i = enhancedMessages.length - 2; i >= 0; i--) {
if (enhancedMessages[i].role === 'assistant') {
enhancedMessages[i] = {
...enhancedMessages[i],
providerOptions: {
bedrock: { cachePoint: { type: 'default' } },
},
};
break; // Only cache the last assistant message
}
}
}
// System messages with multiple cache breakpoints for optimal caching:
// - Breakpoint 1: Static instructions (~1500 tokens) - rarely changes
// - Breakpoint 2: Current XML context - changes per diagram, but constant within a conversation turn
// This allows: if only user message changes, both system caches are reused
// if XML changes, instruction cache is still reused
const systemMessages = [
// Cache breakpoint 1: Instructions (rarely change)
{
role: 'system' as const,
content: systemMessage,
providerOptions: {
bedrock: { cachePoint: { type: 'default' } },
},
},
// Cache breakpoint 2: Current diagram XML context
{
role: 'system' as const,
content: `Current diagram XML:\n"""xml\n${xml || ''}\n"""\nWhen using edit_diagram, COPY search patterns exactly from this XML - attribute order matters!`,
providerOptions: {
bedrock: { cachePoint: { type: 'default' } },
},
},
];
const allMessages = [...systemMessages, ...enhancedMessages];
const result = streamText({
model,
messages: allMessages,
...(providerOptions && { providerOptions }),
...(headers && { headers }),
// Langfuse telemetry config (returns undefined if not configured)
...(getTelemetryConfig({ sessionId: validSessionId, userId }) && {
experimental_telemetry: getTelemetryConfig({ sessionId: validSessionId, userId }),
}),
onFinish: ({ text, usage, providerMetadata }) => {
console.log('[Cache] Full providerMetadata:', JSON.stringify(providerMetadata, null, 2));
console.log('[Cache] Usage:', JSON.stringify(usage, null, 2));
setTraceOutput(text);
},
tools: {
// Client-side tool that will be executed on the client
display_diagram: {
description: `Display a diagram on draw.io. Pass the XML content inside <root> tags.
VALIDATION RULES (XML will be rejected if violated):
1. All mxCell elements must be DIRECT children of <root> - never nested
2. Every mxCell needs a unique id
3. Every mxCell (except id="0") needs a valid parent attribute
4. Edge source/target must reference existing cell IDs
5. Escape special chars in values: &lt; &gt; &amp; &quot;
6. Always start with: <mxCell id="0"/><mxCell id="1" parent="0"/>
Example with swimlanes and edges (note: all mxCells are siblings):
<root>
<mxCell id="0"/>
<mxCell id="1" parent="0"/>
<mxCell id="lane1" value="Frontend" style="swimlane;" vertex="1" parent="1">
<mxGeometry x="40" y="40" width="200" height="200" as="geometry"/>
</mxCell>
<mxCell id="step1" value="Step 1" style="rounded=1;" vertex="1" parent="lane1">
<mxGeometry x="20" y="60" width="160" height="40" as="geometry"/>
</mxCell>
<mxCell id="lane2" value="Backend" style="swimlane;" vertex="1" parent="1">
<mxGeometry x="280" y="40" width="200" height="200" as="geometry"/>
</mxCell>
<mxCell id="step2" value="Step 2" style="rounded=1;" vertex="1" parent="lane2">
<mxGeometry x="20" y="60" width="160" height="40" as="geometry"/>
</mxCell>
<mxCell id="edge1" style="edgeStyle=orthogonalEdgeStyle;endArrow=classic;" edge="1" parent="1" source="step1" target="step2">
<mxGeometry relative="1" as="geometry"/>
</mxCell>
</root>
Notes:
- For AWS diagrams, use **AWS 2025 icons**.
- For animated connectors, add "flowAnimation=1" to edge style.
`,
inputSchema: z.object({
xml: z.string().describe("XML string to be displayed on draw.io")
})
},
edit_diagram: {
description: `Edit specific parts of the current diagram by replacing exact line matches. Use this tool to make targeted fixes without regenerating the entire XML.
CRITICAL: Copy-paste the EXACT search pattern from the "Current diagram XML" in system context. Do NOT reorder attributes or reformat - the attribute order in draw.io XML varies and you MUST match it exactly.
IMPORTANT: Keep edits concise:
- COPY the exact mxCell line from the current XML (attribute order matters!)
- Only include the lines that are changing, plus 1-2 surrounding lines for context if needed
- Break large changes into multiple smaller edits
- Each search must contain complete lines (never truncate mid-line)
- First match only - be specific enough to target the right element`,
inputSchema: z.object({
edits: z.array(z.object({
search: z.string().describe("EXACT lines copied from current XML (preserve attribute order!)"),
replace: z.string().describe("Replacement lines")
})).describe("Array of search/replace pairs to apply sequentially")
})
},
},
temperature: 0,
});
// Error handler function to provide detailed error messages
function errorHandler(error: unknown) {
if (error == null) {
return 'unknown error';
}
const errorString = typeof error === 'string'
? error
: error instanceof Error
? error.message
: JSON.stringify(error);
// Check for image not supported error (e.g., DeepSeek models)
if (errorString.includes('image_url') ||
errorString.includes('unknown variant') ||
(errorString.includes('image') && errorString.includes('not supported'))) {
return 'This model does not support image inputs. Please remove the image and try again, or switch to a vision-capable model.';
}
return errorString;
}
return result.toUIMessageStreamResponse({
onError: errorHandler,
});
}
// Wrap handler with error handling
async function safeHandler(req: Request): Promise<Response> {
try {
return await handleChatRequest(req);
} catch (error) {
console.error('Error in chat route:', error);
return Response.json({ error: 'Internal server error' }, { status: 500 });
}
}
// Wrap with Langfuse observe (if configured)
const observedHandler = wrapWithObserve(safeHandler);
export async function POST(req: Request) {
return observedHandler(req);
}