2025-12-01 14:07:50 +09:00
import { streamText , convertToModelMessages , createUIMessageStream , createUIMessageStreamResponse } from 'ai' ;
2025-11-15 13:36:42 +09:00
import { getAIModel } from '@/lib/ai-providers' ;
2025-12-01 14:07:50 +09:00
import { findCachedResponse } from '@/lib/cached-responses' ;
2025-12-05 21:15:02 +09:00
import { setTraceInput , setTraceOutput , getTelemetryConfig , wrapWithObserve } from '@/lib/langfuse' ;
2025-12-04 13:26:06 +09:00
import { getSystemPrompt } from '@/lib/system-prompts' ;
2025-11-15 13:36:42 +09:00
import { z } from "zod" ;
2025-04-04 02:10:24 +00:00
2025-12-01 00:46:40 +09:00
export const maxDuration = 300 ;
2025-03-19 11:03:37 +00:00
2025-12-05 19:30:50 +09:00
// File upload limits (must match client-side)
const MAX_FILE_SIZE = 2 * 1024 * 1024 ; // 2MB
const MAX_FILES = 5 ;
// Helper function to validate file parts in messages
function validateFileParts ( messages : any [ ] ) : { valid : boolean ; error? : string } {
const lastMessage = messages [ messages . length - 1 ] ;
const fileParts = lastMessage ? . parts ? . filter ( ( p : any ) = > p . type === 'file' ) || [ ] ;
if ( fileParts . length > MAX_FILES ) {
return { valid : false , error : ` Too many files. Maximum ${ MAX_FILES } allowed. ` } ;
}
for ( const filePart of fileParts ) {
// Data URLs format: data:image/png;base64,<data>
// Base64 increases size by ~33%, so we check the decoded size
if ( filePart . url && filePart . url . startsWith ( 'data:' ) ) {
const base64Data = filePart . url . split ( ',' ) [ 1 ] ;
if ( base64Data ) {
const sizeInBytes = Math . ceil ( ( base64Data . length * 3 ) / 4 ) ;
if ( sizeInBytes > MAX_FILE_SIZE ) {
return { valid : false , error : ` File exceeds ${ MAX_FILE_SIZE / 1024 / 1024 } MB limit. ` } ;
}
}
}
}
return { valid : true } ;
}
2025-12-01 14:07:50 +09:00
// Helper function to check if diagram is minimal/empty
function isMinimalDiagram ( xml : string ) : boolean {
const stripped = xml . replace ( /\s/g , '' ) ;
return ! stripped . includes ( 'id="2"' ) ;
}
// Helper function to create cached stream response
function createCachedStreamResponse ( xml : string ) : Response {
const toolCallId = ` cached- ${ Date . now ( ) } ` ;
const stream = createUIMessageStream ( {
execute : async ( { writer } ) = > {
writer . write ( { type : 'start' } ) ;
writer . write ( { type : 'tool-input-start' , toolCallId , toolName : 'display_diagram' } ) ;
writer . write ( { type : 'tool-input-delta' , toolCallId , inputTextDelta : xml } ) ;
writer . write ( { type : 'tool-input-available' , toolCallId , toolName : 'display_diagram' , input : { xml } } ) ;
writer . write ( { type : 'finish' } ) ;
} ,
} ) ;
return createUIMessageStreamResponse ( { stream } ) ;
}
2025-12-04 11:24:26 +09:00
// Inner handler function
async function handleChatRequest ( req : Request ) : Promise < Response > {
2025-12-05 21:09:34 +08:00
// Check for access code
const accessCodes = process . env . ACCESS_CODE_LIST ? . split ( ',' ) . map ( code = > code . trim ( ) ) . filter ( Boolean ) || [ ] ;
if ( accessCodes . length > 0 ) {
const accessCodeHeader = req . headers . get ( 'x-access-code' ) ;
if ( ! accessCodeHeader || ! accessCodes . includes ( accessCodeHeader ) ) {
return Response . json (
{ error : 'Invalid or missing access code. Please configure it in Settings.' } ,
{ status : 401 }
) ;
}
}
2025-12-05 21:15:02 +09:00
const { messages , xml , sessionId } = await req . json ( ) ;
// Get user IP for Langfuse tracking
const forwardedFor = req . headers . get ( 'x-forwarded-for' ) ;
const userId = forwardedFor ? . split ( ',' ) [ 0 ] ? . trim ( ) || 'anonymous' ;
// Validate sessionId for Langfuse (must be string, max 200 chars)
const validSessionId = sessionId && typeof sessionId === 'string' && sessionId . length <= 200
? sessionId
: undefined ;
// Extract user input text for Langfuse trace
const currentMessage = messages [ messages . length - 1 ] ;
const userInputText = currentMessage ? . parts ? . find ( ( p : any ) = > p . type === 'text' ) ? . text || '' ;
// Update Langfuse trace with input, session, and user
setTraceInput ( {
input : userInputText ,
sessionId : validSessionId ,
userId : userId ,
} ) ;
2025-03-19 11:03:37 +00:00
2025-12-05 19:30:50 +09:00
// === FILE VALIDATION START ===
const fileValidation = validateFileParts ( messages ) ;
if ( ! fileValidation . valid ) {
return Response . json ( { error : fileValidation.error } , { status : 400 } ) ;
}
// === FILE VALIDATION END ===
2025-12-04 11:24:26 +09:00
// === CACHE CHECK START ===
const isFirstMessage = messages . length === 1 ;
const isEmptyDiagram = ! xml || xml . trim ( ) === '' || isMinimalDiagram ( xml ) ;
2025-12-01 14:07:50 +09:00
2025-12-04 13:26:06 +09:00
if ( isFirstMessage && isEmptyDiagram ) {
const lastMessage = messages [ 0 ] ;
const textPart = lastMessage . parts ? . find ( ( p : any ) = > p . type === 'text' ) ;
const filePart = lastMessage . parts ? . find ( ( p : any ) = > p . type === 'file' ) ;
2025-12-01 14:07:50 +09:00
2025-12-04 13:26:06 +09:00
const cached = findCachedResponse ( textPart ? . text || '' , ! ! filePart ) ;
2025-12-01 14:07:50 +09:00
2025-12-04 13:26:06 +09:00
if ( cached ) {
console . log ( '[Cache] Returning cached response for:' , textPart ? . text ) ;
return createCachedStreamResponse ( cached . xml ) ;
2025-12-01 14:07:50 +09:00
}
2025-12-04 13:26:06 +09:00
}
// === CACHE CHECK END ===
// Get AI model from environment configuration
const { model , providerOptions , headers , modelId } = getAIModel ( ) ;
// Get the appropriate system prompt based on model (extended for Opus/Haiku 4.5)
const systemMessage = getSystemPrompt ( modelId ) ;
const lastMessage = messages [ messages . length - 1 ] ;
// Extract text from the last message parts
const lastMessageText = lastMessage . parts ? . find ( ( part : any ) = > part . type === 'text' ) ? . text || '' ;
// Extract file parts (images) from the last message
const fileParts = lastMessage . parts ? . filter ( ( part : any ) = > part . type === 'file' ) || [ ] ;
// User input only - XML is now in a separate cached system message
const formattedUserInput = ` User input:
2025-03-24 02:38:27 +00:00
"" " md
2025-08-31 12:54:14 +09:00
$ { lastMessageText }
2025-03-24 02:38:27 +00:00
"" " ` ;
2025-08-31 12:54:14 +09:00
2025-12-04 13:26:06 +09:00
// Convert UIMessages to ModelMessages and add system message
const modelMessages = convertToModelMessages ( messages ) ;
// Filter out messages with empty content arrays (Bedrock API rejects these)
// This is a safety measure - ideally convertToModelMessages should handle all cases
let enhancedMessages = modelMessages . filter ( ( msg : any ) = >
msg . content && Array . isArray ( msg . content ) && msg . content . length > 0
) ;
// Update the last message with user input only (XML moved to separate cached system message)
if ( enhancedMessages . length >= 1 ) {
const lastModelMessage = enhancedMessages [ enhancedMessages . length - 1 ] ;
if ( lastModelMessage . role === 'user' ) {
// Build content array with user input text and file parts
const contentParts : any [ ] = [
{ type : 'text' , text : formattedUserInput }
] ;
// Add image parts back
for ( const filePart of fileParts ) {
contentParts . push ( {
type : 'image' ,
image : filePart.url ,
mimeType : filePart.mediaType
} ) ;
2025-11-10 00:00:02 +09:00
}
2025-12-04 13:26:06 +09:00
enhancedMessages = [
. . . enhancedMessages . slice ( 0 , - 1 ) ,
{ . . . lastModelMessage , content : contentParts }
] ;
2025-08-31 12:54:14 +09:00
}
2025-12-04 13:26:06 +09:00
}
2025-08-31 12:54:14 +09:00
2025-12-04 13:26:06 +09:00
// Add cache point to the last assistant message in conversation history
// This caches the entire conversation prefix for subsequent requests
// Strategy: system (cached) + history with last assistant (cached) + new user message
if ( enhancedMessages . length >= 2 ) {
// Find the last assistant message (should be second-to-last, before current user message)
for ( let i = enhancedMessages . length - 2 ; i >= 0 ; i -- ) {
if ( enhancedMessages [ i ] . role === 'assistant' ) {
enhancedMessages [ i ] = {
. . . enhancedMessages [ i ] ,
providerOptions : {
bedrock : { cachePoint : { type : 'default' } } ,
} ,
} ;
break ; // Only cache the last assistant message
2025-12-01 10:43:33 +09:00
}
}
2025-12-04 13:26:06 +09:00
}
2025-11-10 00:00:02 +09:00
2025-12-04 13:26:06 +09:00
// System messages with multiple cache breakpoints for optimal caching:
// - Breakpoint 1: Static instructions (~1500 tokens) - rarely changes
// - Breakpoint 2: Current XML context - changes per diagram, but constant within a conversation turn
// This allows: if only user message changes, both system caches are reused
// if XML changes, instruction cache is still reused
const systemMessages = [
// Cache breakpoint 1: Instructions (rarely change)
{
2025-12-01 10:43:33 +09:00
role : 'system' as const ,
content : systemMessage ,
providerOptions : {
bedrock : { cachePoint : { type : 'default' } } ,
} ,
2025-12-04 13:26:06 +09:00
} ,
// Cache breakpoint 2: Current diagram XML context
{
role : 'system' as const ,
content : ` Current diagram XML: \ n"""xml \ n ${ xml || '' } \ n""" \ nWhen using edit_diagram, COPY search patterns exactly from this XML - attribute order matters! ` ,
providerOptions : {
bedrock : { cachePoint : { type : 'default' } } ,
2025-12-01 10:43:33 +09:00
} ,
2025-12-04 13:26:06 +09:00
} ,
] ;
const allMessages = [ . . . systemMessages , . . . enhancedMessages ] ;
const result = streamText ( {
model ,
messages : allMessages ,
. . . ( providerOptions && { providerOptions } ) ,
. . . ( headers && { headers } ) ,
2025-12-05 21:15:02 +09:00
// Langfuse telemetry config (returns undefined if not configured)
. . . ( getTelemetryConfig ( { sessionId : validSessionId , userId } ) && {
experimental_telemetry : getTelemetryConfig ( { sessionId : validSessionId , userId } ) ,
} ) ,
onFinish : ( { text , usage , providerMetadata } ) = > {
console . log ( '[Cache] Full providerMetadata:' , JSON . stringify ( providerMetadata , null , 2 ) ) ;
2025-12-05 20:18:19 +09:00
console . log ( '[Cache] Usage:' , JSON . stringify ( usage , null , 2 ) ) ;
2025-12-05 21:15:02 +09:00
// Pass usage to Langfuse (Bedrock streaming doesn't auto-report tokens to telemetry)
// AI SDK uses inputTokens/outputTokens, Langfuse expects promptTokens/completionTokens
setTraceOutput ( text , {
promptTokens : usage?.inputTokens ,
completionTokens : usage?.outputTokens ,
} ) ;
2025-12-04 13:26:06 +09:00
} ,
tools : {
// Client-side tool that will be executed on the client
display_diagram : {
description : ` Display a diagram on draw.io. Pass the XML content inside <root> tags.
2025-12-03 16:14:53 +09:00
VALIDATION RULES ( XML will be rejected if violated ) :
1 . All mxCell elements must be DIRECT children of < root > - never nested
2 . Every mxCell needs a unique id
3 . Every mxCell ( except id = "0" ) needs a valid parent attribute
4 . Edge source / target must reference existing cell IDs
5 . Escape special chars in values : & lt ; & gt ; & amp ; & quot ;
6 . Always start with : < mxCell id = "0" / > < mxCell id = "1" parent = "0" / >
Example with swimlanes and edges ( note : all mxCells are siblings ) :
< root >
< mxCell id = "0" / >
< mxCell id = "1" parent = "0" / >
< mxCell id = "lane1" value = "Frontend" style = "swimlane;" vertex = "1" parent = "1" >
< mxGeometry x = "40" y = "40" width = "200" height = "200" as = "geometry" / >
< / mxCell >
< mxCell id = "step1" value = "Step 1" style = "rounded=1;" vertex = "1" parent = "lane1" >
< mxGeometry x = "20" y = "60" width = "160" height = "40" as = "geometry" / >
< / mxCell >
< mxCell id = "lane2" value = "Backend" style = "swimlane;" vertex = "1" parent = "1" >
< mxGeometry x = "280" y = "40" width = "200" height = "200" as = "geometry" / >
< / mxCell >
< mxCell id = "step2" value = "Step 2" style = "rounded=1;" vertex = "1" parent = "lane2" >
< mxGeometry x = "20" y = "60" width = "160" height = "40" as = "geometry" / >
< / mxCell >
< mxCell id = "edge1" style = "edgeStyle=orthogonalEdgeStyle;endArrow=classic;" edge = "1" parent = "1" source = "step1" target = "step2" >
< mxGeometry relative = "1" as = "geometry" / >
< / mxCell >
< / root >
Notes :
- For AWS diagrams , use * * AWS 2025 icons * * .
- For animated connectors , add "flowAnimation=1" to edge style .
` ,
2025-12-04 13:26:06 +09:00
inputSchema : z.object ( {
xml : z.string ( ) . describe ( "XML string to be displayed on draw.io" )
} )
} ,
edit_diagram : {
description : ` Edit specific parts of the current diagram by replacing exact line matches. Use this tool to make targeted fixes without regenerating the entire XML.
2025-12-06 12:37:37 +09:00
WHEN TO USE :
- Changing text labels or values
- Modifying colors , styles , or visual properties
- Adding or removing individual elements ( 1 - 3 elements )
- Repositioning specific elements
- Any small , targeted modification
WHEN TO USE display_diagram INSTEAD :
- Creating a new diagram from scratch
- Major restructuring ( reorganizing layout , changing diagram type )
- Adding many new elements ( more than 3 )
- After 3 failed edit_diagram attempts
CRITICAL RULES :
1 . Copy - paste the EXACT search pattern from the "Current diagram XML" in system context
2 . Do NOT reorder attributes - attribute order in draw . io XML varies , you MUST match exactly
3 . Always include the element ' s id attribute for unique targeting
4 . Include complete lines ( never truncate mid - line )
5 . For multiple changes , use separate edits in the array
ERROR RECOVERY :
- If pattern not found , check attribute order matches current XML exactly
- Retry up to 3 times with adjusted patterns
- After 3 failures , use display_diagram instead ` ,
2025-12-04 13:26:06 +09:00
inputSchema : z.object ( {
edits : z.array ( z . object ( {
search : z.string ( ) . describe ( "EXACT lines copied from current XML (preserve attribute order!)" ) ,
replace : z.string ( ) . describe ( "Replacement lines" )
} ) ) . describe ( "Array of search/replace pairs to apply sequentially" )
} )
2025-08-31 20:52:04 +09:00
} ,
2025-12-04 13:26:06 +09:00
} ,
temperature : 0 ,
} ) ;
2025-08-19 01:17:17 +00:00
2025-12-05 20:18:19 +09:00
return result . toUIMessageStreamResponse ( ) ;
2025-12-04 11:24:26 +09:00
}
2025-12-05 21:15:02 +09:00
// Wrap handler with error handling
async function safeHandler ( req : Request ) : Promise < Response > {
2025-12-04 11:24:26 +09:00
try {
return await handleChatRequest ( req ) ;
2025-11-10 00:00:02 +09:00
} catch ( error ) {
console . error ( 'Error in chat route:' , error ) ;
2025-12-04 11:24:26 +09:00
return Response . json ( { error : 'Internal server error' } , { status : 500 } ) ;
2025-08-19 01:17:17 +00:00
}
2025-03-19 08:16:44 +00:00
}
2025-12-05 21:15:02 +09:00
// Wrap with Langfuse observe (if configured)
const observedHandler = wrapWithObserve ( safeHandler ) ;
export async function POST ( req : Request ) {
return observedHandler ( req ) ;
}