chore: upgrade AI SDK to v6.0.1

- Upgrade ai package from ^5.0.89 to ^6.0.1 - Upgrade @ai-sdk/* provider packages to latest v3/v4 - Update convertToModelMessages call to async (new API) - Fix usage.cachedInputTokens to usage.inputTokenDetails?.cacheReadTokens
2026-01-05 07:42:28 +08:00 · 2025-12-23 13:13:21 +09:00
12 changed files with 503 additions and 1291 deletions
--- a/app/api/chat/route.ts
+++ b/app/api/chat/route.ts
@@ -14,11 +14,6 @@ import path from "path"
 import { z } from "zod"
 import { getAIModel, supportsPromptCaching } from "@/lib/ai-providers"
 import { findCachedResponse } from "@/lib/cached-responses"
 import {
    checkAndIncrementRequest,
    isQuotaEnabled,
    recordTokenUsage,
 } from "@/lib/dynamo-quota-manager"
 import {
    getTelemetryConfig,
    setTraceInput,
@@ -167,13 +162,9 @@ async function handleChatRequest(req: Request): Promise<Response> {
    const { messages, xml, previousXml, sessionId } = await req.json()
-    // Get user IP for Langfuse tracking (hashed for privacy)
+    // Get user IP for Langfuse tracking
    const forwardedFor = req.headers.get("x-forwarded-for")
-    const rawIp = forwardedFor?.split(",")[0]?.trim() || "anonymous"
+    const userId = forwardedFor?.split(",")[0]?.trim() || "anonymous"
    const userId =
        rawIp === "anonymous"
            ? rawIp
            : `user-${Buffer.from(rawIp).toString("base64url").slice(0, 8)}`
    // Validate sessionId for Langfuse (must be string, max 200 chars)
    const validSessionId =
@@ -182,12 +173,9 @@ async function handleChatRequest(req: Request): Promise<Response> {
            : undefined
    // Extract user input text for Langfuse trace
-    // Find the last USER message, not just the last message (which could be assistant in multi-step tool flows)
+    const lastMessage = messages[messages.length - 1]
    const lastUserMessage = [...messages]
        .reverse()
        .find((m: any) => m.role === "user")
    const userInputText =
-        lastUserMessage?.parts?.find((p: any) => p.type === "text")?.text || ""
+        lastMessage?.parts?.find((p: any) => p.type === "text")?.text || ""
    // Update Langfuse trace with input, session, and user
    setTraceInput({
@@ -196,33 +184,6 @@ async function handleChatRequest(req: Request): Promise<Response> {
        userId: userId,
    })
    // === SERVER-SIDE QUOTA CHECK START ===
    // Quota is opt-in: only enabled when DYNAMODB_QUOTA_TABLE env var is set
    const hasOwnApiKey = !!(
        req.headers.get("x-ai-provider") && req.headers.get("x-ai-api-key")
    )
    // Skip quota check if: quota disabled, user has own API key, or is anonymous
    if (isQuotaEnabled() && !hasOwnApiKey && userId !== "anonymous") {
        const quotaCheck = await checkAndIncrementRequest(userId, {
            requests: Number(process.env.DAILY_REQUEST_LIMIT) || 10,
            tokens: Number(process.env.DAILY_TOKEN_LIMIT) || 200000,
            tpm: Number(process.env.TPM_LIMIT) || 20000,
        })
        if (!quotaCheck.allowed) {
            return Response.json(
                {
                    error: quotaCheck.error,
                    type: quotaCheck.type,
                    used: quotaCheck.used,
                    limit: quotaCheck.limit,
                },
                { status: 429 },
            )
        }
    }
    // === SERVER-SIDE QUOTA CHECK END ===
    // === FILE VALIDATION START ===
    const fileValidation = validateFileParts(messages)
    if (!fileValidation.valid) {
@@ -276,10 +237,9 @@ async function handleChatRequest(req: Request): Promise<Response> {
    // Get the appropriate system prompt based on model (extended for Opus/Haiku 4.5)
    const systemMessage = getSystemPrompt(modelId, minimalStyle)
-    // Extract file parts (images) from the last user message
+    // Extract file parts (images) from the last message
    const fileParts =
-        lastUserMessage?.parts?.filter((part: any) => part.type === "file") ||
+        lastMessage.parts?.filter((part: any) => part.type === "file") || []
        []
    // User input only - XML is now in a separate cached system message
    const formattedUserInput = `User input:
@@ -542,26 +502,12 @@ ${userInputText}
                userId,
            }),
        }),
-        onFinish: ({ text, totalUsage }) => {
+        onFinish: ({ text, usage }) => {
-            // AI SDK 6 telemetry auto-reports token usage on its spans
+            // Pass usage to Langfuse (Bedrock streaming doesn't auto-report tokens to telemetry)
-            setTraceOutput(text)
+            setTraceOutput(text, {
-
+                promptTokens: usage?.inputTokens,
-            // Record token usage for server-side quota tracking (if enabled)
+                completionTokens: usage?.outputTokens,
-            // Use totalUsage (cumulative across all steps) instead of usage (final step only)
+            })
            // Include all 4 token types: input, output, cache read, cache write
            if (
                isQuotaEnabled() &&
                !hasOwnApiKey &&
                userId !== "anonymous" &&
                totalUsage
            ) {
                const totalTokens =
                    (totalUsage.inputTokens || 0) +
                    (totalUsage.outputTokens || 0) +
                    (totalUsage.cachedInputTokens || 0) +
                    (totalUsage.inputTokenDetails?.cacheWriteTokens || 0)
                recordTokenUsage(userId, totalTokens)
            }
        },
        tools: {
            // Client-side tool that will be executed on the client
@@ -731,9 +677,20 @@ Call this tool to get shape names and usage syntax for a specific library.`,
        messageMetadata: ({ part }) => {
            if (part.type === "finish") {
                const usage = (part as any).totalUsage
-                // AI SDK 6 provides totalTokens directly
+                if (!usage) {
                    console.warn(
                        "[messageMetadata] No usage data in finish part",
                    )
                    return undefined
                }
                // Total input = non-cached + cached (these are separate counts)
                // Note: cacheWriteInputTokens is not available on finish part
                const totalInputTokens =
                    (usage.inputTokens ?? 0) +
                    (usage.inputTokenDetails?.cacheReadTokens ?? 0)
                return {
-                    totalTokens: usage?.totalTokens ?? 0,
+                    inputTokens: totalInputTokens,
                    outputTokens: usage.outputTokens ?? 0,
                    finishReason: (part as any).finishReason,
                }
            }
--- a/app/api/log-feedback/route.ts
+++ b/app/api/log-feedback/route.ts
@@ -27,18 +27,9 @@ export async function POST(req: Request) {
    const { messageId, feedback, sessionId } = data
-    // Skip logging if no sessionId - prevents attaching to wrong user's trace
+    // Get user IP for tracking
    if (!sessionId) {
        return Response.json({ success: true, logged: false })
    }
    // Get user IP for tracking (hashed for privacy)
    const forwardedFor = req.headers.get("x-forwarded-for")
-    const rawIp = forwardedFor?.split(",")[0]?.trim() || "anonymous"
+    const userId = forwardedFor?.split(",")[0]?.trim() || "anonymous"
    const userId =
        rawIp === "anonymous"
            ? rawIp
            : `user-${Buffer.from(rawIp).toString("base64url").slice(0, 8)}`
    try {
        // Find the most recent chat trace for this session to attach the score to
--- a/app/api/log-save/route.ts
+++ b/app/api/log-save/route.ts
@@ -27,11 +27,6 @@ export async function POST(req: Request) {
    const { filename, format, sessionId } = data
    // Skip logging if no sessionId - prevents attaching to wrong user's trace
    if (!sessionId) {
        return Response.json({ success: true, logged: false })
    }
    try {
        const timestamp = new Date().toISOString()
--- a/components/chat-message-display.tsx
+++ b/components/chat-message-display.tsx
@@ -31,7 +31,6 @@ import { getApiEndpoint } from "@/lib/base-path"
 import {
    applyDiagramOperations,
    convertToLegalXml,
    extractCompleteMxCells,
    isMxCellXmlComplete,
    replaceNodes,
    validateAndFixXml,
@@ -316,28 +315,12 @@ export function ChatMessageDisplay({
    const handleDisplayChart = useCallback(
        (xml: string, showToast = false) => {
-            let currentXml = xml || ""
+            const currentXml = xml || ""
            const startTime = performance.now()
            // During streaming (showToast=false), extract only complete mxCell elements
            // This allows progressive rendering even with partial/incomplete trailing XML
            if (!showToast) {
                const completeCells = extractCompleteMxCells(currentXml)
                if (!completeCells) {
                    return
                }
                currentXml = completeCells
            }
            const convertedXml = convertToLegalXml(currentXml)
            if (convertedXml !== previousXML.current) {
                // Parse and validate XML BEFORE calling replaceNodes
                const parser = new DOMParser()
-                // Wrap in root element for parsing multiple mxCell elements
+                const testDoc = parser.parseFromString(convertedXml, "text/xml")
                const testDoc = parser.parseFromString(
                    `<root>${convertedXml}</root>`,
                    "text/xml",
                )
                const parseError = testDoc.querySelector("parsererror")
                if (parseError) {
@@ -364,22 +347,7 @@ export function ChatMessageDisplay({
                        `<mxfile><diagram name="Page-1" id="page-1"><mxGraphModel><root><mxCell id="0"/><mxCell id="1" parent="0"/></root></mxGraphModel></diagram></mxfile>`
                    const replacedXML = replaceNodes(baseXML, convertedXml)
-                    const xmlProcessTime = performance.now() - startTime
+                    // Validate and auto-fix the XML
                    // During streaming (showToast=false), skip heavy validation for lower latency
                    // The quick DOM parse check above catches malformed XML
                    // Full validation runs on final output (showToast=true)
                    if (!showToast) {
                        previousXML.current = convertedXml
                        const loadStartTime = performance.now()
                        onDisplayChart(replacedXML, true)
                        console.log(
                            `[Streaming] XML processing: ${xmlProcessTime.toFixed(1)}ms, drawio load: ${(performance.now() - loadStartTime).toFixed(1)}ms`,
                        )
                        return
                    }
                    // Final output: run full validation and auto-fix
                    const validation = validateAndFixXml(replacedXML)
                    if (validation.valid) {
                        previousXML.current = convertedXml
@@ -392,19 +360,18 @@ export function ChatMessageDisplay({
                            )
                        }
                        // Skip validation in loadDiagram since we already validated above
                        const loadStartTime = performance.now()
                        onDisplayChart(xmlToLoad, true)
                        console.log(
                            `[Final] XML processing: ${xmlProcessTime.toFixed(1)}ms, validation+load: ${(performance.now() - loadStartTime).toFixed(1)}ms`,
                        )
                    } else {
                        console.error(
                            "[ChatMessageDisplay] XML validation failed:",
                            validation.error,
                        )
-                        toast.error(
+                        // Only show toast if this is the final XML (not during streaming)
-                            "Diagram validation failed. Please try regenerating.",
+                        if (showToast) {
-                        )
+                            toast.error(
                                "Diagram validation failed. Please try regenerating.",
                            )
                        }
                    }
                } catch (error) {
                    console.error(
@@ -636,10 +603,17 @@ export function ChatMessageDisplay({
            }
        })
-        // NOTE: Don't cleanup debounce timeouts here!
+        // Cleanup: clear any pending debounce timeout on unmount
-        // The cleanup runs on every re-render (when messages changes),
+        return () => {
-        // which would cancel the timeout before it fires.
+            if (debounceTimeoutRef.current) {
-        // Let the timeouts complete naturally - they're harmless if component unmounts.
+                clearTimeout(debounceTimeoutRef.current)
                debounceTimeoutRef.current = null
            }
            if (editDebounceTimeoutRef.current) {
                clearTimeout(editDebounceTimeoutRef.current)
                editDebounceTimeoutRef.current = null
            }
        }
    }, [messages, handleDisplayChart, chartXML])
    const renderToolPart = (part: ToolPartLike) => {
--- a/components/chat-panel.tsx
+++ b/components/chat-panel.tsx
@@ -76,7 +76,6 @@ interface ChatPanelProps {
 const TOOL_ERROR_STATE = "output-error" as const
 const DEBUG = process.env.NODE_ENV === "development"
 const MAX_AUTO_RETRY_COUNT = 1
 const MAX_CONTINUATION_RETRY_COUNT = 2 // Limit for truncation continuation retries
 /**
 * Check if auto-resubmit should happen based on tool errors.
@@ -217,8 +216,6 @@ export default function ChatPanel({
    // Ref to track consecutive auto-retry count (reset on user action)
    const autoRetryCountRef = useRef(0)
    // Ref to track continuation retry count (for truncation handling)
    const continuationRetryCountRef = useRef(0)
    // Ref to accumulate partial XML when output is truncated due to maxOutputTokens
    // When partialXmlRef.current.length > 0, we're in continuation mode
@@ -556,23 +553,6 @@ Continue from EXACTLY where you stopped.`,
            }
        },
        onError: (error) => {
            // Handle server-side quota limit (429 response)
            if (error.message.includes("Daily request limit")) {
                quotaManager.showQuotaLimitToast()
                return
            }
            if (error.message.includes("Daily token limit")) {
                quotaManager.showTokenLimitToast(dailyTokenLimit)
                return
            }
            if (
                error.message.includes("Rate limit exceeded") ||
                error.message.includes("tokens per minute")
            ) {
                quotaManager.showTPMLimitToast()
                return
            }
            // Silence access code error in console since it's handled by UI
            if (!error.message.includes("Invalid or missing access code")) {
                console.error("Chat error:", error)
@@ -649,6 +629,22 @@ Continue from EXACTLY where you stopped.`,
            // DEBUG: Log finish reason to diagnose truncation
            console.log("[onFinish] finishReason:", metadata?.finishReason)
            console.log("[onFinish] metadata:", metadata)
            if (metadata) {
                // Use Number.isFinite to guard against NaN (typeof NaN === 'number' is true)
                const inputTokens = Number.isFinite(metadata.inputTokens)
                    ? (metadata.inputTokens as number)
                    : 0
                const outputTokens = Number.isFinite(metadata.outputTokens)
                    ? (metadata.outputTokens as number)
                    : 0
                const actualTokens = inputTokens + outputTokens
                if (actualTokens > 0) {
                    quotaManager.incrementTokenCount(actualTokens)
                    quotaManager.incrementTPMCount(actualTokens)
                }
            }
        },
        sendAutomaticallyWhen: ({ messages }) => {
            const isInContinuationMode = partialXmlRef.current.length > 0
@@ -660,25 +656,15 @@ Continue from EXACTLY where you stopped.`,
            if (!shouldRetry) {
                // No error, reset retry count and clear state
                autoRetryCountRef.current = 0
                continuationRetryCountRef.current = 0
                partialXmlRef.current = ""
                return false
            }
-            // Continuation mode: limited retries for truncation handling
+            // Continuation mode: unlimited retries (truncation continuation, not real errors)
            // Server limits to 5 steps via stepCountIs(5)
            if (isInContinuationMode) {
-                if (
+                // Don't count against retry limit for continuation
-                    continuationRetryCountRef.current >=
+                // Quota checks still apply below
                    MAX_CONTINUATION_RETRY_COUNT
                ) {
                    toast.error(
                        `Continuation retry limit reached (${MAX_CONTINUATION_RETRY_COUNT}). The diagram may be too complex.`,
                    )
                    continuationRetryCountRef.current = 0
                    partialXmlRef.current = ""
                    return false
                }
                continuationRetryCountRef.current++
            } else {
                // Regular error: check retry count limit
                if (autoRetryCountRef.current >= MAX_AUTO_RETRY_COUNT) {
@@ -693,6 +679,23 @@ Continue from EXACTLY where you stopped.`,
                autoRetryCountRef.current++
            }
            // Check quota limits before auto-retry
            const tokenLimitCheck = quotaManager.checkTokenLimit()
            if (!tokenLimitCheck.allowed) {
                quotaManager.showTokenLimitToast(tokenLimitCheck.used)
                autoRetryCountRef.current = 0
                partialXmlRef.current = ""
                return false
            }
            const tpmCheck = quotaManager.checkTPMLimit()
            if (!tpmCheck.allowed) {
                quotaManager.showTPMLimitToast()
                autoRetryCountRef.current = 0
                partialXmlRef.current = ""
                return false
            }
            return true
        },
    })
@@ -909,6 +912,9 @@ Continue from EXACTLY where you stopped.`,
                xmlSnapshotsRef.current.set(messageIndex, chartXml)
                saveXmlSnapshots()
                // Check all quota limits
                if (!checkAllQuotaLimits()) return
                sendChatMessage(parts, chartXml, previousXml, sessionId)
                // Token count is tracked in onFinish with actual server usage
@@ -986,7 +992,30 @@ Continue from EXACTLY where you stopped.`,
        saveXmlSnapshots()
    }
-    // Send chat message with headers
+    // Check all quota limits (daily requests, tokens, TPM)
    const checkAllQuotaLimits = (): boolean => {
        const limitCheck = quotaManager.checkDailyLimit()
        if (!limitCheck.allowed) {
            quotaManager.showQuotaLimitToast()
            return false
        }
        const tokenLimitCheck = quotaManager.checkTokenLimit()
        if (!tokenLimitCheck.allowed) {
            quotaManager.showTokenLimitToast(tokenLimitCheck.used)
            return false
        }
        const tpmCheck = quotaManager.checkTPMLimit()
        if (!tpmCheck.allowed) {
            quotaManager.showTPMLimitToast()
            return false
        }
        return true
    }
    // Send chat message with headers and increment quota
    const sendChatMessage = (
        parts: any,
        xml: string,
@@ -995,7 +1024,6 @@ Continue from EXACTLY where you stopped.`,
    ) => {
        // Reset all retry/continuation state on user-initiated message
        autoRetryCountRef.current = 0
        continuationRetryCountRef.current = 0
        partialXmlRef.current = ""
        const config = getSelectedAIConfig()
@@ -1036,6 +1064,7 @@ Continue from EXACTLY where you stopped.`,
                },
            },
        )
        quotaManager.incrementRequestCount()
    }
    // Process files and append content to user text (handles PDF, text, and optionally images)
@@ -1123,8 +1152,13 @@ Continue from EXACTLY where you stopped.`,
            setMessages(newMessages)
        })
        // Check all quota limits
        if (!checkAllQuotaLimits()) return
        // Now send the message after state is guaranteed to be updated
        sendChatMessage(userParts, savedXml, previousXml, sessionId)
        // Token count is tracked in onFinish with actual server usage
    }
    const handleEditMessage = async (messageIndex: number, newText: string) => {
@@ -1166,8 +1200,12 @@ Continue from EXACTLY where you stopped.`,
            setMessages(newMessages)
        })
        // Check all quota limits
        if (!checkAllQuotaLimits()) return
        // Now send the edited message after state is guaranteed to be updated
        sendChatMessage(newParts, savedXml, previousXml, sessionId)
        // Token count is tracked in onFinish with actual server usage
    }
    // Collapsed view (desktop only)
--- a/lib/ai-providers.ts
+++ b/lib/ai-providers.ts
@@ -95,8 +95,8 @@ function parseIntSafe(
 * Supports various AI SDK providers with their unique configuration options
 *
 * Environment variables:
- * - OPENAI_REASONING_EFFORT: OpenAI reasoning effort level (minimal/low/medium/high) - for o1/o3/o4/gpt-5
+ * - OPENAI_REASONING_EFFORT: OpenAI reasoning effort level (minimal/low/medium/high) - for o1/o3/gpt-5
- * - OPENAI_REASONING_SUMMARY: OpenAI reasoning summary (auto/detailed) - auto-enabled for o1/o3/o4/gpt-5
+ * - OPENAI_REASONING_SUMMARY: OpenAI reasoning summary (none/brief/detailed) - auto-enabled for o1/o3/gpt-5
 * - ANTHROPIC_THINKING_BUDGET_TOKENS: Anthropic thinking budget in tokens (1024-64000)
 * - ANTHROPIC_THINKING_TYPE: Anthropic thinking type (enabled)
 * - GOOGLE_THINKING_BUDGET: Google Gemini 2.5 thinking budget in tokens (1024-100000)
@@ -118,19 +118,18 @@ function buildProviderOptions(
            const reasoningEffort = process.env.OPENAI_REASONING_EFFORT
            const reasoningSummary = process.env.OPENAI_REASONING_SUMMARY
-            // OpenAI reasoning models (o1, o3, o4, gpt-5) need reasoningSummary to return thoughts
+            // OpenAI reasoning models (o1, o3, gpt-5) need reasoningSummary to return thoughts
            if (
                modelId &&
                (modelId.includes("o1") ||
                    modelId.includes("o3") ||
                    modelId.includes("o4") ||
                    modelId.includes("gpt-5"))
            ) {
                options.openai = {
-                    // Auto-enable reasoning summary for reasoning models
+                    // Auto-enable reasoning summary for reasoning models (default: detailed)
                    // Use 'auto' as default since not all models support 'detailed'
                    reasoningSummary:
-                        (reasoningSummary as "auto" | "detailed") || "auto",
+                        (reasoningSummary as "none" | "brief" | "detailed") ||
                        "detailed",
                }
                // Optionally configure reasoning effort
@@ -153,7 +152,8 @@ function buildProviderOptions(
                }
                if (reasoningSummary) {
                    options.openai.reasoningSummary = reasoningSummary as
-                        | "auto"
+                        | "none"
                        | "brief"
                        | "detailed"
                }
            }
@@ -593,9 +593,7 @@ export function getAIModel(overrides?: ClientOverrides): ModelConfig {
                    apiKey,
                    ...(baseURL && { baseURL }),
                })
-                // Use Responses API (default) instead of .chat() to support reasoning
+                model = customOpenAI.chat(modelId)
                // for gpt-5, o1, o3, o4 models. Chat Completions API does not emit reasoning events.
                model = customOpenAI(modelId)
            } else {
                model = openai(modelId)
            }
--- a/lib/dynamo-quota-manager.ts
+++ b/lib/dynamo-quota-manager.ts
@@ -1,238 +0,0 @@
 import {
    ConditionalCheckFailedException,
    DynamoDBClient,
    GetItemCommand,
    UpdateItemCommand,
 } from "@aws-sdk/client-dynamodb"
 // Quota tracking is OPT-IN: only enabled if DYNAMODB_QUOTA_TABLE is explicitly set
 // OSS users who don't need quota tracking can simply not set this env var
 const TABLE = process.env.DYNAMODB_QUOTA_TABLE
 const DYNAMODB_REGION = process.env.DYNAMODB_REGION || "ap-northeast-1"
 // Only create client if quota is enabled
 const client = TABLE ? new DynamoDBClient({ region: DYNAMODB_REGION }) : null
 /**
 * Check if server-side quota tracking is enabled.
 * Quota is opt-in: only enabled when DYNAMODB_QUOTA_TABLE env var is set.
 */
 export function isQuotaEnabled(): boolean {
    return !!TABLE
 }
 interface QuotaLimits {
    requests: number // Daily request limit
    tokens: number // Daily token limit
    tpm: number // Tokens per minute
 }
 interface QuotaCheckResult {
    allowed: boolean
    error?: string
    type?: "request" | "token" | "tpm"
    used?: number
    limit?: number
 }
 /**
 * Check all quotas and increment request count atomically.
 * Uses ConditionExpression to prevent race conditions.
 * Returns which limit was exceeded if any.
 */
 export async function checkAndIncrementRequest(
    ip: string,
    limits: QuotaLimits,
 ): Promise<QuotaCheckResult> {
    // Skip if quota tracking not enabled
    if (!client || !TABLE) {
        return { allowed: true }
    }
    const today = new Date().toISOString().split("T")[0]
    const currentMinute = Math.floor(Date.now() / 60000).toString()
    const ttl = Math.floor(Date.now() / 1000) + 7 * 24 * 60 * 60
    try {
        // Atomic check-and-increment with ConditionExpression
        // This prevents race conditions by failing if limits are exceeded
        await client.send(
            new UpdateItemCommand({
                TableName: TABLE,
                Key: { PK: { S: `IP#${ip}` } },
                // Reset counts if new day/minute, then increment request count
                UpdateExpression: `
                    SET lastResetDate = :today,
                        dailyReqCount = if_not_exists(dailyReqCount, :zero) + :one,
                        dailyTokenCount = if_not_exists(dailyTokenCount, :zero),
                        lastMinute = :minute,
                        tpmCount = if_not_exists(tpmCount, :zero),
                        #ttl = :ttl
                `,
                // Atomic condition: only succeed if ALL limits pass
                // Uses attribute_not_exists for new items, then checks limits for existing items
                ConditionExpression: `
                    (attribute_not_exists(lastResetDate) OR lastResetDate < :today OR
                     ((attribute_not_exists(dailyReqCount) OR dailyReqCount < :reqLimit) AND
                      (attribute_not_exists(dailyTokenCount) OR dailyTokenCount < :tokenLimit))) AND
                    (attribute_not_exists(lastMinute) OR lastMinute <> :minute OR
                     attribute_not_exists(tpmCount) OR tpmCount < :tpmLimit)
                `,
                ExpressionAttributeNames: { "#ttl": "ttl" },
                ExpressionAttributeValues: {
                    ":today": { S: today },
                    ":zero": { N: "0" },
                    ":one": { N: "1" },
                    ":minute": { S: currentMinute },
                    ":ttl": { N: String(ttl) },
                    ":reqLimit": { N: String(limits.requests || 999999) },
                    ":tokenLimit": { N: String(limits.tokens || 999999) },
                    ":tpmLimit": { N: String(limits.tpm || 999999) },
                },
            }),
        )
        return { allowed: true }
    } catch (e: any) {
        // Condition failed - need to determine which limit was exceeded
        if (e instanceof ConditionalCheckFailedException) {
            // Get current counts to determine which limit was hit
            try {
                const getResult = await client.send(
                    new GetItemCommand({
                        TableName: TABLE,
                        Key: { PK: { S: `IP#${ip}` } },
                    }),
                )
                const item = getResult.Item
                const storedDate = item?.lastResetDate?.S
                const storedMinute = item?.lastMinute?.S
                const isNewDay = !storedDate || storedDate < today
                const dailyReqCount = isNewDay
                    ? 0
                    : Number(item?.dailyReqCount?.N || 0)
                const dailyTokenCount = isNewDay
                    ? 0
                    : Number(item?.dailyTokenCount?.N || 0)
                const tpmCount =
                    storedMinute !== currentMinute
                        ? 0
                        : Number(item?.tpmCount?.N || 0)
                // Determine which limit was exceeded
                if (limits.requests > 0 && dailyReqCount >= limits.requests) {
                    return {
                        allowed: false,
                        type: "request",
                        error: "Daily request limit exceeded",
                        used: dailyReqCount,
                        limit: limits.requests,
                    }
                }
                if (limits.tokens > 0 && dailyTokenCount >= limits.tokens) {
                    return {
                        allowed: false,
                        type: "token",
                        error: "Daily token limit exceeded",
                        used: dailyTokenCount,
                        limit: limits.tokens,
                    }
                }
                if (limits.tpm > 0 && tpmCount >= limits.tpm) {
                    return {
                        allowed: false,
                        type: "tpm",
                        error: "Rate limit exceeded (tokens per minute)",
                        used: tpmCount,
                        limit: limits.tpm,
                    }
                }
                // Condition failed but no limit clearly exceeded - race condition edge case
                // Fail safe by allowing (could be a reset race)
                console.warn(
                    `[quota] Condition failed but no limit exceeded for IP prefix: ${ip.slice(0, 8)}...`,
                )
                return { allowed: true }
            } catch (getError: any) {
                console.error(
                    `[quota] Failed to get quota details after condition failure, IP prefix: ${ip.slice(0, 8)}..., error: ${getError.message}`,
                )
                return { allowed: true } // Fail open
            }
        }
        // Other DynamoDB errors - fail open
        console.error(
            `[quota] DynamoDB error (fail-open), IP prefix: ${ip.slice(0, 8)}..., error: ${e.message}`,
        )
        return { allowed: true }
    }
 }
 /**
 * Record token usage after response completes.
 * Uses atomic operations to update both daily token count and TPM count.
 * Handles minute boundaries atomically to prevent race conditions.
 */
 export async function recordTokenUsage(
    ip: string,
    tokens: number,
 ): Promise<void> {
    // Skip if quota tracking not enabled
    if (!client || !TABLE) return
    if (!Number.isFinite(tokens) || tokens <= 0) return
    const currentMinute = Math.floor(Date.now() / 60000).toString()
    const ttl = Math.floor(Date.now() / 1000) + 7 * 24 * 60 * 60
    try {
        // Try to update assuming same minute (most common case)
        // Uses condition to ensure we're in the same minute
        await client.send(
            new UpdateItemCommand({
                TableName: TABLE,
                Key: { PK: { S: `IP#${ip}` } },
                UpdateExpression:
                    "SET #ttl = :ttl ADD dailyTokenCount :tokens, tpmCount :tokens",
                ConditionExpression: "lastMinute = :minute",
                ExpressionAttributeNames: { "#ttl": "ttl" },
                ExpressionAttributeValues: {
                    ":minute": { S: currentMinute },
                    ":tokens": { N: String(tokens) },
                    ":ttl": { N: String(ttl) },
                },
            }),
        )
    } catch (e: any) {
        if (e instanceof ConditionalCheckFailedException) {
            // Different minute - reset TPM count and set new minute
            try {
                await client.send(
                    new UpdateItemCommand({
                        TableName: TABLE,
                        Key: { PK: { S: `IP#${ip}` } },
                        UpdateExpression:
                            "SET lastMinute = :minute, tpmCount = :tokens, #ttl = :ttl ADD dailyTokenCount :tokens",
                        ExpressionAttributeNames: { "#ttl": "ttl" },
                        ExpressionAttributeValues: {
                            ":minute": { S: currentMinute },
                            ":tokens": { N: String(tokens) },
                            ":ttl": { N: String(ttl) },
                        },
                    }),
                )
            } catch (retryError: any) {
                console.error(
                    `[quota] Failed to record tokens (retry), IP prefix: ${ip.slice(0, 8)}..., tokens: ${tokens}, error: ${retryError.message}`,
                )
            }
        } else {
            console.error(
                `[quota] Failed to record tokens, IP prefix: ${ip.slice(0, 8)}..., tokens: ${tokens}, error: ${e.message}`,
            )
        }
    }
 }
--- a/lib/langfuse.ts
+++ b/lib/langfuse.ts
@@ -21,11 +21,9 @@ export function getLangfuseClient(): LangfuseClient | null {
    return langfuseClient
 }
-// Check if Langfuse is configured (both keys required)
+// Check if Langfuse is configured
 export function isLangfuseEnabled(): boolean {
-    return !!(
+    return !!process.env.LANGFUSE_PUBLIC_KEY
        process.env.LANGFUSE_PUBLIC_KEY && process.env.LANGFUSE_SECRET_KEY
    )
 }
 // Update trace with input data at the start of request
@@ -45,16 +43,34 @@ export function setTraceInput(params: {
 }
 // Update trace with output and end the span
-// Note: AI SDK 6 telemetry automatically reports token usage on its spans,
+export function setTraceOutput(
-// so we only need to set the output text and close our wrapper span
+    output: string,
-export function setTraceOutput(output: string) {
+    usage?: { promptTokens?: number; completionTokens?: number },
 ) {
    if (!isLangfuseEnabled()) return
    updateActiveTrace({ output })
    // End the observe() wrapper span (AI SDK creates its own child spans with usage)
    const activeSpan = api.trace.getActiveSpan()
    if (activeSpan) {
        // Manually set usage attributes since AI SDK Bedrock streaming doesn't provide them
        if (usage?.promptTokens) {
            activeSpan.setAttribute("ai.usage.promptTokens", usage.promptTokens)
            activeSpan.setAttribute(
                "gen_ai.usage.input_tokens",
                usage.promptTokens,
            )
        }
        if (usage?.completionTokens) {
            activeSpan.setAttribute(
                "ai.usage.completionTokens",
                usage.completionTokens,
            )
            activeSpan.setAttribute(
                "gen_ai.usage.output_tokens",
                usage.completionTokens,
            )
        }
        activeSpan.end()
    }
 }
--- a/lib/use-quota-manager.tsx
+++ b/lib/use-quota-manager.tsx
@@ -1,10 +1,11 @@
 "use client"
-import { useCallback } from "react"
+import { useCallback, useMemo } from "react"
 import { toast } from "sonner"
 import { QuotaLimitToast } from "@/components/quota-limit-toast"
 import { useDictionary } from "@/hooks/use-dictionary"
 import { formatMessage } from "@/lib/i18n/utils"
 import { STORAGE_KEYS } from "@/lib/storage"
 export interface QuotaConfig {
    dailyRequestLimit: number
@@ -12,19 +13,134 @@ export interface QuotaConfig {
    tpmLimit: number
 }
 export interface QuotaCheckResult {
    allowed: boolean
    remaining: number
    used: number
 }
 /**
- * Hook for displaying quota limit toasts.
+ * Hook for managing request/token quotas and rate limiting.
- * Server-side handles actual quota enforcement via DynamoDB.
+ * Handles three types of limits:
- * This hook only provides UI feedback when limits are exceeded.
+ * - Daily request limit
 * - Daily token limit
 * - Tokens per minute (TPM) rate limit
 *
 * Users with their own API key bypass all limits.
 */
 export function useQuotaManager(config: QuotaConfig): {
    hasOwnApiKey: () => boolean
    checkDailyLimit: () => QuotaCheckResult
    checkTokenLimit: () => QuotaCheckResult
    checkTPMLimit: () => QuotaCheckResult
    incrementRequestCount: () => void
    incrementTokenCount: (tokens: number) => void
    incrementTPMCount: (tokens: number) => void
    showQuotaLimitToast: () => void
    showTokenLimitToast: (used: number) => void
    showTPMLimitToast: () => void
 } {
    const { dailyRequestLimit, dailyTokenLimit, tpmLimit } = config
    const dict = useDictionary()
    // Check if user has their own API key configured (bypass limits)
    const hasOwnApiKey = useCallback((): boolean => {
        const provider = localStorage.getItem(STORAGE_KEYS.aiProvider)
        const apiKey = localStorage.getItem(STORAGE_KEYS.aiApiKey)
        return !!(provider && apiKey)
    }, [])
    // Generic helper: Parse count from localStorage with NaN guard
    const parseStorageCount = (key: string): number => {
        const count = parseInt(localStorage.getItem(key) || "0", 10)
        return Number.isNaN(count) ? 0 : count
    }
    // Generic helper: Create quota checker factory
    const createQuotaChecker = useCallback(
        (
            getTimeKey: () => string,
            timeStorageKey: string,
            countStorageKey: string,
            limit: number,
        ) => {
            return (): QuotaCheckResult => {
                if (hasOwnApiKey())
                    return { allowed: true, remaining: -1, used: 0 }
                if (limit <= 0) return { allowed: true, remaining: -1, used: 0 }
                const currentTime = getTimeKey()
                const storedTime = localStorage.getItem(timeStorageKey)
                let count = parseStorageCount(countStorageKey)
                if (storedTime !== currentTime) {
                    count = 0
                    localStorage.setItem(timeStorageKey, currentTime)
                    localStorage.setItem(countStorageKey, "0")
                }
                return {
                    allowed: count < limit,
                    remaining: limit - count,
                    used: count,
                }
            }
        },
        [hasOwnApiKey],
    )
    // Generic helper: Create quota incrementer factory
    const createQuotaIncrementer = useCallback(
        (
            getTimeKey: () => string,
            timeStorageKey: string,
            countStorageKey: string,
            validateInput: boolean = false,
        ) => {
            return (tokens: number = 1): void => {
                if (validateInput && (!Number.isFinite(tokens) || tokens <= 0))
                    return
                const currentTime = getTimeKey()
                const storedTime = localStorage.getItem(timeStorageKey)
                let count = parseStorageCount(countStorageKey)
                if (storedTime !== currentTime) {
                    count = 0
                    localStorage.setItem(timeStorageKey, currentTime)
                }
                localStorage.setItem(countStorageKey, String(count + tokens))
            }
        },
        [],
    )
    // Check daily request limit
    const checkDailyLimit = useMemo(
        () =>
            createQuotaChecker(
                () => new Date().toDateString(),
                STORAGE_KEYS.requestDate,
                STORAGE_KEYS.requestCount,
                dailyRequestLimit,
            ),
        [createQuotaChecker, dailyRequestLimit],
    )
    // Increment request count
    const incrementRequestCount = useMemo(
        () =>
            createQuotaIncrementer(
                () => new Date().toDateString(),
                STORAGE_KEYS.requestDate,
                STORAGE_KEYS.requestCount,
                false,
            ),
        [createQuotaIncrementer],
    )
    // Show quota limit toast (request-based)
    const showQuotaLimitToast = useCallback(() => {
        toast.custom(
@@ -39,6 +155,30 @@ export function useQuotaManager(config: QuotaConfig): {
        )
    }, [dailyRequestLimit])
    // Check daily token limit
    const checkTokenLimit = useMemo(
        () =>
            createQuotaChecker(
                () => new Date().toDateString(),
                STORAGE_KEYS.tokenDate,
                STORAGE_KEYS.tokenCount,
                dailyTokenLimit,
            ),
        [createQuotaChecker, dailyTokenLimit],
    )
    // Increment token count
    const incrementTokenCount = useMemo(
        () =>
            createQuotaIncrementer(
                () => new Date().toDateString(),
                STORAGE_KEYS.tokenDate,
                STORAGE_KEYS.tokenCount,
                true, // Validate input tokens
            ),
        [createQuotaIncrementer],
    )
    // Show token limit toast
    const showTokenLimitToast = useCallback(
        (used: number) => {
@@ -57,6 +197,30 @@ export function useQuotaManager(config: QuotaConfig): {
        [dailyTokenLimit],
    )
    // Check TPM (tokens per minute) limit
    const checkTPMLimit = useMemo(
        () =>
            createQuotaChecker(
                () => Math.floor(Date.now() / 60000).toString(),
                STORAGE_KEYS.tpmMinute,
                STORAGE_KEYS.tpmCount,
                tpmLimit,
            ),
        [createQuotaChecker, tpmLimit],
    )
    // Increment TPM count
    const incrementTPMCount = useMemo(
        () =>
            createQuotaIncrementer(
                () => Math.floor(Date.now() / 60000).toString(),
                STORAGE_KEYS.tpmMinute,
                STORAGE_KEYS.tpmCount,
                true, // Validate input tokens
            ),
        [createQuotaIncrementer],
    )
    // Show TPM limit toast
    const showTPMLimitToast = useCallback(() => {
        const limitDisplay =
@@ -69,6 +233,18 @@ export function useQuotaManager(config: QuotaConfig): {
    }, [tpmLimit, dict])
    return {
        // Check functions
        hasOwnApiKey,
        checkDailyLimit,
        checkTokenLimit,
        checkTPMLimit,
        // Increment functions
        incrementRequestCount,
        incrementTokenCount,
        incrementTPMCount,
        // Toast functions
        showQuotaLimitToast,
        showTokenLimitToast,
        showTPMLimitToast,
--- a/lib/utils.ts
+++ b/lib/utils.ts
@@ -61,47 +61,6 @@ export function isMxCellXmlComplete(xml: string | undefined | null): boolean {
    return trimmed.endsWith("/>") || trimmed.endsWith("</mxCell>")
 }
 /**
 * Extract only complete mxCell elements from partial/streaming XML.
 * This allows progressive rendering during streaming by ignoring incomplete trailing elements.
 * @param xml - The partial XML string (may contain incomplete trailing mxCell)
 * @returns XML string containing only complete mxCell elements
 */
 export function extractCompleteMxCells(xml: string | undefined | null): string {
    if (!xml) return ""
    const completeCells: Array<{ index: number; text: string }> = []
    // Match self-closing mxCell tags: <mxCell ... />
    // Also match mxCell with nested mxGeometry: <mxCell ...>...<mxGeometry .../></mxCell>
    const selfClosingPattern = /<mxCell\s+[^>]*\/>/g
    const nestedPattern = /<mxCell\s+[^>]*>[\s\S]*?<\/mxCell>/g
    // Find all self-closing mxCell elements
    let match: RegExpExecArray | null
    while ((match = selfClosingPattern.exec(xml)) !== null) {
        completeCells.push({ index: match.index, text: match[0] })
    }
    // Find all mxCell elements with nested content (like mxGeometry)
    while ((match = nestedPattern.exec(xml)) !== null) {
        completeCells.push({ index: match.index, text: match[0] })
    }
    // Sort by position to maintain order
    completeCells.sort((a, b) => a.index - b.index)
    // Remove duplicates (a self-closing match might overlap with nested match)
    const seen = new Set<number>()
    const uniqueCells = completeCells.filter((cell) => {
        if (seen.has(cell.index)) return false
        seen.add(cell.index)
        return true
    })
    return uniqueCells.map((c) => c.text).join("\n")
 }
 // ============================================================================
 // XML Parsing Helpers
 // ============================================================================
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@@ -32,7 +32,6 @@
        "@ai-sdk/google": "^3.0.0",
        "@ai-sdk/openai": "^3.0.0",
        "@ai-sdk/react": "^3.0.1",
        "@aws-sdk/client-dynamodb": "^3.957.0",
        "@aws-sdk/credential-providers": "^3.943.0",
        "@formatjs/intl-localematcher": "^0.7.2",
        "@langfuse/client": "^4.4.9",