2025-12-06 12:46:40 +09:00
import {
2025-12-08 19:52:18 +08:00
APICallError ,
2025-12-06 12:46:40 +09:00
convertToModelMessages ,
createUIMessageStream ,
createUIMessageStreamResponse ,
2025-12-08 19:52:18 +08:00
LoadAPIKeyError ,
2025-12-07 00:40:13 +09:00
stepCountIs ,
2025-12-06 12:46:40 +09:00
streamText ,
} from "ai"
import { z } from "zod"
2025-12-09 15:53:59 +09:00
import { getAIModel , supportsPromptCaching } from "@/lib/ai-providers"
2025-12-06 12:46:40 +09:00
import { findCachedResponse } from "@/lib/cached-responses"
import {
getTelemetryConfig ,
setTraceInput ,
setTraceOutput ,
wrapWithObserve ,
} from "@/lib/langfuse"
import { getSystemPrompt } from "@/lib/system-prompts"
2025-12-08 10:56:37 +09:00
export const maxDuration = 300
2025-03-19 11:03:37 +00:00
2025-12-05 19:30:50 +09:00
// File upload limits (must match client-side)
2025-12-06 12:46:40 +09:00
const MAX_FILE_SIZE = 2 * 1024 * 1024 // 2MB
const MAX_FILES = 5
2025-12-05 19:30:50 +09:00
// Helper function to validate file parts in messages
2025-12-06 12:46:40 +09:00
function validateFileParts ( messages : any [ ] ) : {
valid : boolean
error? : string
} {
const lastMessage = messages [ messages . length - 1 ]
const fileParts =
lastMessage ? . parts ? . filter ( ( p : any ) = > p . type === "file" ) || [ ]
if ( fileParts . length > MAX_FILES ) {
return {
valid : false ,
error : ` Too many files. Maximum ${ MAX_FILES } allowed. ` ,
2025-12-05 19:30:50 +09:00
}
}
2025-12-06 12:46:40 +09:00
for ( const filePart of fileParts ) {
// Data URLs format: data:image/png;base64,<data>
// Base64 increases size by ~33%, so we check the decoded size
2025-12-06 16:18:26 +09:00
if ( filePart . url ? . startsWith ( "data:" ) ) {
2025-12-06 12:46:40 +09:00
const base64Data = filePart . url . split ( "," ) [ 1 ]
if ( base64Data ) {
const sizeInBytes = Math . ceil ( ( base64Data . length * 3 ) / 4 )
if ( sizeInBytes > MAX_FILE_SIZE ) {
return {
valid : false ,
error : ` File exceeds ${ MAX_FILE_SIZE / 1024 / 1024 } MB limit. ` ,
}
}
}
}
}
return { valid : true }
2025-12-05 19:30:50 +09:00
}
2025-12-01 14:07:50 +09:00
// Helper function to check if diagram is minimal/empty
function isMinimalDiagram ( xml : string ) : boolean {
2025-12-06 12:46:40 +09:00
const stripped = xml . replace ( /\s/g , "" )
return ! stripped . includes ( 'id="2"' )
2025-12-01 14:07:50 +09:00
}
2025-12-07 00:40:13 +09:00
// Helper function to fix tool call inputs for Bedrock API
// Bedrock requires toolUse.input to be a JSON object, not a string
function fixToolCallInputs ( messages : any [ ] ) : any [ ] {
2025-12-07 19:56:31 +09:00
return messages . map ( ( msg ) = > {
2025-12-07 00:40:13 +09:00
if ( msg . role !== "assistant" || ! Array . isArray ( msg . content ) ) {
return msg
}
2025-12-07 19:56:31 +09:00
const fixedContent = msg . content . map ( ( part : any ) = > {
2025-12-07 14:38:15 +09:00
if ( part . type === "tool-call" ) {
if ( typeof part . input === "string" ) {
try {
const parsed = JSON . parse ( part . input )
return { . . . part , input : parsed }
} catch {
// If parsing fails, wrap the string in an object
return { . . . part , input : { rawInput : part.input } }
}
}
// Input is already an object, but verify it's not null/undefined
if ( part . input === null || part . input === undefined ) {
return { . . . part , input : { } }
2025-12-07 00:40:13 +09:00
}
}
return part
} )
return { . . . msg , content : fixedContent }
} )
}
2025-12-01 14:07:50 +09:00
// Helper function to create cached stream response
function createCachedStreamResponse ( xml : string ) : Response {
2025-12-06 12:46:40 +09:00
const toolCallId = ` cached- ${ Date . now ( ) } `
const stream = createUIMessageStream ( {
execute : async ( { writer } ) = > {
writer . write ( { type : "start" } )
writer . write ( {
type : "tool-input-start" ,
toolCallId ,
toolName : "display_diagram" ,
} )
writer . write ( {
type : "tool-input-delta" ,
toolCallId ,
inputTextDelta : xml ,
} )
writer . write ( {
type : "tool-input-available" ,
toolCallId ,
toolName : "display_diagram" ,
input : { xml } ,
} )
writer . write ( { type : "finish" } )
} ,
} )
return createUIMessageStreamResponse ( { stream } )
2025-12-01 14:07:50 +09:00
}
2025-12-04 11:24:26 +09:00
// Inner handler function
async function handleChatRequest ( req : Request ) : Promise < Response > {
2025-12-06 12:46:40 +09:00
// Check for access code
const accessCodes =
process . env . ACCESS_CODE_LIST ? . split ( "," )
. map ( ( code ) = > code . trim ( ) )
. filter ( Boolean ) || [ ]
if ( accessCodes . length > 0 ) {
const accessCodeHeader = req . headers . get ( "x-access-code" )
if ( ! accessCodeHeader || ! accessCodes . includes ( accessCodeHeader ) ) {
return Response . json (
{
error : "Invalid or missing access code. Please configure it in Settings." ,
} ,
{ status : 401 } ,
)
}
2025-12-05 21:09:34 +08:00
}
2025-12-06 12:46:40 +09:00
const { messages , xml , sessionId } = await req . json ( )
// Get user IP for Langfuse tracking
const forwardedFor = req . headers . get ( "x-forwarded-for" )
const userId = forwardedFor ? . split ( "," ) [ 0 ] ? . trim ( ) || "anonymous"
// Validate sessionId for Langfuse (must be string, max 200 chars)
const validSessionId =
sessionId && typeof sessionId === "string" && sessionId . length <= 200
? sessionId
: undefined
// Extract user input text for Langfuse trace
const currentMessage = messages [ messages . length - 1 ]
const userInputText =
currentMessage ? . parts ? . find ( ( p : any ) = > p . type === "text" ) ? . text || ""
// Update Langfuse trace with input, session, and user
setTraceInput ( {
input : userInputText ,
sessionId : validSessionId ,
userId : userId ,
} )
// === FILE VALIDATION START ===
const fileValidation = validateFileParts ( messages )
if ( ! fileValidation . valid ) {
return Response . json ( { error : fileValidation.error } , { status : 400 } )
2025-12-01 14:07:50 +09:00
}
2025-12-06 12:46:40 +09:00
// === FILE VALIDATION END ===
// === CACHE CHECK START ===
const isFirstMessage = messages . length === 1
const isEmptyDiagram = ! xml || xml . trim ( ) === "" || isMinimalDiagram ( xml )
2025-12-04 13:26:06 +09:00
2025-12-07 19:36:09 +09:00
// DEBUG: Log cache check conditions
console . log ( "[Cache DEBUG] messages.length:" , messages . length )
console . log ( "[Cache DEBUG] isFirstMessage:" , isFirstMessage )
console . log ( "[Cache DEBUG] xml length:" , xml ? . length || 0 )
console . log ( "[Cache DEBUG] xml preview:" , xml ? . substring ( 0 , 200 ) )
console . log ( "[Cache DEBUG] isEmptyDiagram:" , isEmptyDiagram )
2025-12-06 12:46:40 +09:00
if ( isFirstMessage && isEmptyDiagram ) {
const lastMessage = messages [ 0 ]
const textPart = lastMessage . parts ? . find ( ( p : any ) = > p . type === "text" )
const filePart = lastMessage . parts ? . find ( ( p : any ) = > p . type === "file" )
2025-12-04 13:26:06 +09:00
2025-12-06 12:46:40 +09:00
const cached = findCachedResponse ( textPart ? . text || "" , ! ! filePart )
2025-12-04 13:26:06 +09:00
2025-12-06 12:46:40 +09:00
if ( cached ) {
return createCachedStreamResponse ( cached . xml )
}
}
// === CACHE CHECK END ===
2025-12-04 13:26:06 +09:00
feat: add bring-your-own-API-key support (#186)
- Add AI provider settings to config panel (provider, model, API key, base URL)
- Support 7 providers: OpenAI, Anthropic, Google, Azure, OpenRouter, DeepSeek, SiliconFlow
- Client API keys stored in localStorage, never stored on server
- Client settings override server env vars when provided
- Skip server credential validation when client provides API key
- Bypass usage limits (request/token/TPM) when using own API key
- Add /api/config endpoint for fetching usage limits
- Add privacy notices to settings dialog, about pages, and quota toast
- Add clear settings button to reset saved API keys
- Update README files (EN/CN/JA) with BYOK documentation
Co-authored-by: dayuan.jiang <jiangdy@amazon.co.jp>
2025-12-09 17:50:07 +09:00
// Read client AI provider overrides from headers
const clientOverrides = {
provider : req.headers.get ( "x-ai-provider" ) ,
baseUrl : req.headers.get ( "x-ai-base-url" ) ,
apiKey : req.headers.get ( "x-ai-api-key" ) ,
modelId : req.headers.get ( "x-ai-model" ) ,
}
// Get AI model with optional client overrides
const { model , providerOptions , headers , modelId } =
getAIModel ( clientOverrides )
2025-12-04 13:26:06 +09:00
2025-12-09 15:53:59 +09:00
// Check if model supports prompt caching
const shouldCache = supportsPromptCaching ( modelId )
console . log (
` [Prompt Caching] ${ shouldCache ? "ENABLED" : "DISABLED" } for model: ${ modelId } ` ,
)
2025-12-06 12:46:40 +09:00
// Get the appropriate system prompt based on model (extended for Opus/Haiku 4.5)
const systemMessage = getSystemPrompt ( modelId )
2025-12-04 13:26:06 +09:00
2025-12-06 12:46:40 +09:00
const lastMessage = messages [ messages . length - 1 ]
// Extract text from the last message parts
const lastMessageText =
lastMessage . parts ? . find ( ( part : any ) = > part . type === "text" ) ? . text || ""
// Extract file parts (images) from the last message
const fileParts =
lastMessage . parts ? . filter ( ( part : any ) = > part . type === "file" ) || [ ]
// User input only - XML is now in a separate cached system message
const formattedUserInput = ` User input:
2025-03-24 02:38:27 +00:00
"" " md
2025-08-31 12:54:14 +09:00
$ { lastMessageText }
2025-12-06 12:46:40 +09:00
"" " `
// Convert UIMessages to ModelMessages and add system message
const modelMessages = convertToModelMessages ( messages )
2025-12-07 00:40:13 +09:00
// Fix tool call inputs for Bedrock API (requires JSON objects, not strings)
const fixedMessages = fixToolCallInputs ( modelMessages )
2025-12-06 12:46:40 +09:00
// Filter out messages with empty content arrays (Bedrock API rejects these)
// This is a safety measure - ideally convertToModelMessages should handle all cases
2025-12-07 00:40:13 +09:00
let enhancedMessages = fixedMessages . filter (
2025-12-06 12:46:40 +09:00
( msg : any ) = >
msg . content && Array . isArray ( msg . content ) && msg . content . length > 0 ,
)
// Update the last message with user input only (XML moved to separate cached system message)
if ( enhancedMessages . length >= 1 ) {
const lastModelMessage = enhancedMessages [ enhancedMessages . length - 1 ]
if ( lastModelMessage . role === "user" ) {
// Build content array with user input text and file parts
const contentParts : any [ ] = [
{ type : "text" , text : formattedUserInput } ,
]
// Add image parts back
for ( const filePart of fileParts ) {
contentParts . push ( {
type : "image" ,
image : filePart.url ,
mimeType : filePart.mediaType ,
} )
}
enhancedMessages = [
. . . enhancedMessages . slice ( 0 , - 1 ) ,
{ . . . lastModelMessage , content : contentParts } ,
]
}
2025-08-31 12:54:14 +09:00
}
2025-12-06 12:46:40 +09:00
// Add cache point to the last assistant message in conversation history
// This caches the entire conversation prefix for subsequent requests
// Strategy: system (cached) + history with last assistant (cached) + new user message
2025-12-09 15:53:59 +09:00
if ( shouldCache && enhancedMessages . length >= 2 ) {
2025-12-06 12:46:40 +09:00
// Find the last assistant message (should be second-to-last, before current user message)
for ( let i = enhancedMessages . length - 2 ; i >= 0 ; i -- ) {
if ( enhancedMessages [ i ] . role === "assistant" ) {
enhancedMessages [ i ] = {
. . . enhancedMessages [ i ] ,
providerOptions : {
bedrock : { cachePoint : { type : "default" } } ,
} ,
}
break // Only cache the last assistant message
}
}
2025-12-01 10:43:33 +09:00
}
2025-12-06 12:46:40 +09:00
// System messages with multiple cache breakpoints for optimal caching:
// - Breakpoint 1: Static instructions (~1500 tokens) - rarely changes
// - Breakpoint 2: Current XML context - changes per diagram, but constant within a conversation turn
// This allows: if only user message changes, both system caches are reused
// if XML changes, instruction cache is still reused
const systemMessages = [
// Cache breakpoint 1: Instructions (rarely change)
{
role : "system" as const ,
content : systemMessage ,
2025-12-09 15:53:59 +09:00
. . . ( shouldCache && {
providerOptions : {
bedrock : { cachePoint : { type : "default" } } ,
} ,
} ) ,
2025-12-06 12:46:40 +09:00
} ,
// Cache breakpoint 2: Current diagram XML context
{
role : "system" as const ,
content : ` Current diagram XML: \ n"""xml \ n ${ xml || "" } \ n""" \ nWhen using edit_diagram, COPY search patterns exactly from this XML - attribute order matters! ` ,
2025-12-09 15:53:59 +09:00
. . . ( shouldCache && {
providerOptions : {
bedrock : { cachePoint : { type : "default" } } ,
} ,
} ) ,
2025-12-06 12:46:40 +09:00
} ,
]
const allMessages = [ . . . systemMessages , . . . enhancedMessages ]
const result = streamText ( {
model ,
2025-12-07 00:40:13 +09:00
stopWhen : stepCountIs ( 5 ) ,
2025-12-06 12:46:40 +09:00
messages : allMessages ,
. . . ( providerOptions && { providerOptions } ) ,
. . . ( headers && { headers } ) ,
// Langfuse telemetry config (returns undefined if not configured)
. . . ( getTelemetryConfig ( { sessionId : validSessionId , userId } ) && {
experimental_telemetry : getTelemetryConfig ( {
sessionId : validSessionId ,
userId ,
} ) ,
} ) ,
2025-12-07 00:40:13 +09:00
// Repair malformed tool calls (model sometimes generates invalid JSON with unescaped quotes)
experimental_repairToolCall : async ( { toolCall } ) = > {
// The toolCall.input contains the raw JSON string that failed to parse
const rawJson =
typeof toolCall . input === "string" ? toolCall.input : null
if ( rawJson ) {
try {
// Fix unescaped quotes: x="520" should be x=\"520\"
const fixed = rawJson . replace (
/([a-zA-Z])="(\d+)"/g ,
'$1=\\"$2\\"' ,
)
const parsed = JSON . parse ( fixed )
return {
type : "tool-call" as const ,
toolCallId : toolCall.toolCallId ,
toolName : toolCall.toolName ,
input : JSON.stringify ( parsed ) ,
}
} catch {
// Repair failed, return null
}
}
return null
} ,
2025-12-08 18:56:34 +09:00
onFinish : ( { text , usage } ) = > {
2025-12-06 12:46:40 +09:00
// Pass usage to Langfuse (Bedrock streaming doesn't auto-report tokens to telemetry)
setTraceOutput ( text , {
promptTokens : usage?.inputTokens ,
completionTokens : usage?.outputTokens ,
} )
} ,
tools : {
// Client-side tool that will be executed on the client
display_diagram : {
description : ` Display a diagram on draw.io. Pass the XML content inside <root> tags.
2025-12-03 16:14:53 +09:00
VALIDATION RULES ( XML will be rejected if violated ) :
1 . All mxCell elements must be DIRECT children of < root > - never nested
2 . Every mxCell needs a unique id
3 . Every mxCell ( except id = "0" ) needs a valid parent attribute
4 . Edge source / target must reference existing cell IDs
5 . Escape special chars in values : & lt ; & gt ; & amp ; & quot ;
6 . Always start with : < mxCell id = "0" / > < mxCell id = "1" parent = "0" / >
Example with swimlanes and edges ( note : all mxCells are siblings ) :
< root >
< mxCell id = "0" / >
< mxCell id = "1" parent = "0" / >
< mxCell id = "lane1" value = "Frontend" style = "swimlane;" vertex = "1" parent = "1" >
< mxGeometry x = "40" y = "40" width = "200" height = "200" as = "geometry" / >
< / mxCell >
< mxCell id = "step1" value = "Step 1" style = "rounded=1;" vertex = "1" parent = "lane1" >
< mxGeometry x = "20" y = "60" width = "160" height = "40" as = "geometry" / >
< / mxCell >
< mxCell id = "lane2" value = "Backend" style = "swimlane;" vertex = "1" parent = "1" >
< mxGeometry x = "280" y = "40" width = "200" height = "200" as = "geometry" / >
< / mxCell >
< mxCell id = "step2" value = "Step 2" style = "rounded=1;" vertex = "1" parent = "lane2" >
< mxGeometry x = "20" y = "60" width = "160" height = "40" as = "geometry" / >
< / mxCell >
< mxCell id = "edge1" style = "edgeStyle=orthogonalEdgeStyle;endArrow=classic;" edge = "1" parent = "1" source = "step1" target = "step2" >
< mxGeometry relative = "1" as = "geometry" / >
< / mxCell >
< / root >
Notes :
- For AWS diagrams , use * * AWS 2025 icons * * .
- For animated connectors , add "flowAnimation=1" to edge style .
` ,
2025-12-06 12:46:40 +09:00
inputSchema : z.object ( {
xml : z
. string ( )
. describe ( "XML string to be displayed on draw.io" ) ,
} ) ,
} ,
edit_diagram : {
description : ` Edit specific parts of the current diagram by replacing exact line matches. Use this tool to make targeted fixes without regenerating the entire XML.
2025-12-06 12:41:01 +09:00
CRITICAL : Copy - paste the EXACT search pattern from the "Current diagram XML" in system context . Do NOT reorder attributes or reformat - the attribute order in draw . io XML varies and you MUST match it exactly .
IMPORTANT : Keep edits concise :
- COPY the exact mxCell line from the current XML ( attribute order matters ! )
- Only include the lines that are changing , plus 1 - 2 surrounding lines for context if needed
- Break large changes into multiple smaller edits
- Each search must contain complete lines ( never truncate mid - line )
2025-12-07 00:40:13 +09:00
- First match only - be specific enough to target the right element
⚠ ️ JSON ESCAPING : Every " inside string values MUST be escaped as \\" . Example : x = \ \ "100\\" y = \ \ "200\\" - BOTH quotes need backslashes ! ` ,
2025-12-06 12:46:40 +09:00
inputSchema : z.object ( {
edits : z
. array (
z . object ( {
search : z
. string ( )
. describe (
"EXACT lines copied from current XML (preserve attribute order!)" ,
) ,
replace : z
. string ( )
. describe ( "Replacement lines" ) ,
} ) ,
)
. describe (
"Array of search/replace pairs to apply sequentially" ,
) ,
} ) ,
} ,
} ,
2025-12-06 22:04:59 +05:30
. . . ( process . env . TEMPERATURE !== undefined && {
temperature : parseFloat ( process . env . TEMPERATURE ) ,
} ) ,
2025-12-06 12:46:40 +09:00
} )
2025-12-08 18:56:34 +09:00
return result . toUIMessageStreamResponse ( {
messageMetadata : ( { part } ) = > {
if ( part . type === "finish" ) {
const usage = ( part as any ) . totalUsage
if ( ! usage ) {
console . warn (
"[messageMetadata] No usage data in finish part" ,
)
return undefined
}
// Total input = non-cached + cached (these are separate counts)
// Note: cacheWriteInputTokens is not available on finish part
const totalInputTokens =
( usage . inputTokens ? ? 0 ) + ( usage . cachedInputTokens ? ? 0 )
return {
inputTokens : totalInputTokens ,
outputTokens : usage.outputTokens ? ? 0 ,
}
}
return undefined
} ,
} )
2025-12-04 11:24:26 +09:00
}
2025-12-08 19:52:18 +08:00
// Helper to categorize errors and return appropriate response
function handleError ( error : unknown ) : Response {
console . error ( "Error in chat route:" , error )
const isDev = process . env . NODE_ENV === "development"
// Check for specific AI SDK error types
if ( APICallError . isInstance ( error ) ) {
return Response . json (
{
error : error.message ,
. . . ( isDev && {
details : error.responseBody ,
stack : error.stack ,
} ) ,
} ,
{ status : error.statusCode || 500 } ,
)
}
if ( LoadAPIKeyError . isInstance ( error ) ) {
return Response . json (
{
error : "Authentication failed. Please check your API key." ,
. . . ( isDev && {
stack : error.stack ,
} ) ,
} ,
{ status : 401 } ,
)
}
// Fallback for other errors with safety filter
const message =
error instanceof Error ? error . message : "An unexpected error occurred"
const status = ( error as any ) ? . statusCode || ( error as any ) ? . status || 500
// Prevent leaking API keys, tokens, or other sensitive data
const lowerMessage = message . toLowerCase ( )
const safeMessage =
lowerMessage . includes ( "key" ) ||
lowerMessage . includes ( "token" ) ||
lowerMessage . includes ( "sig" ) ||
lowerMessage . includes ( "signature" ) ||
lowerMessage . includes ( "secret" ) ||
lowerMessage . includes ( "password" ) ||
lowerMessage . includes ( "credential" )
? "Authentication failed. Please check your credentials."
: message
return Response . json (
{
error : safeMessage ,
. . . ( isDev && {
details : message ,
stack : error instanceof Error ? error.stack : undefined ,
} ) ,
} ,
{ status } ,
)
}
2025-12-05 21:15:02 +09:00
// Wrap handler with error handling
async function safeHandler ( req : Request ) : Promise < Response > {
2025-12-06 12:46:40 +09:00
try {
return await handleChatRequest ( req )
} catch ( error ) {
2025-12-08 19:52:18 +08:00
return handleError ( error )
2025-12-06 12:46:40 +09:00
}
2025-03-19 08:16:44 +00:00
}
2025-12-05 21:15:02 +09:00
// Wrap with Langfuse observe (if configured)
2025-12-06 12:46:40 +09:00
const observedHandler = wrapWithObserve ( safeHandler )
2025-12-05 21:15:02 +09:00
export async function POST ( req : Request ) {
2025-12-06 12:46:40 +09:00
return observedHandler ( req )
2025-12-05 21:15:02 +09:00
}