chore: upgrade AI SDK to v6.0.1

- Upgrade ai package from ^5.0.89 to ^6.0.1 - Upgrade @ai-sdk/* provider packages to latest v3/v4 - Update convertToModelMessages call to async (new API) - Fix usage.cachedInputTokens to usage.inputTokenDetails?.cacheReadTokens
2026-01-02 22:32:27 +08:00 · 2025-12-23 13:13:21 +09:00
3 changed files with 17 additions and 39 deletions
--- a/app/api/chat/route.ts
+++ b/app/api/chat/route.ts
@@ -173,12 +173,9 @@ async function handleChatRequest(req: Request): Promise<Response> {
            : undefined

    // Extract user input text for Langfuse trace
-    // Find the last USER message, not just the last message (which could be assistant in multi-step tool flows)
-    const lastUserMessage = [...messages]
-        .reverse()
-        .find((m: any) => m.role === "user")
+    const lastMessage = messages[messages.length - 1]
    const userInputText =
-        lastUserMessage?.parts?.find((p: any) => p.type === "text")?.text || ""
+        lastMessage?.parts?.find((p: any) => p.type === "text")?.text || ""

    // Update Langfuse trace with input, session, and user
    setTraceInput({
@@ -240,10 +237,9 @@ async function handleChatRequest(req: Request): Promise<Response> {
    // Get the appropriate system prompt based on model (extended for Opus/Haiku 4.5)
    const systemMessage = getSystemPrompt(modelId, minimalStyle)

-    // Extract file parts (images) from the last user message
+    // Extract file parts (images) from the last message
    const fileParts =
-        lastUserMessage?.parts?.filter((part: any) => part.type === "file") ||
-        []
+        lastMessage.parts?.filter((part: any) => part.type === "file") || []

    // User input only - XML is now in a separate cached system message
    const formattedUserInput = `User input:
--- a/components/chat-panel.tsx
+++ b/components/chat-panel.tsx
@@ -76,7 +76,6 @@ interface ChatPanelProps {
 const TOOL_ERROR_STATE = "output-error" as const
 const DEBUG = process.env.NODE_ENV === "development"
 const MAX_AUTO_RETRY_COUNT = 1
-const MAX_CONTINUATION_RETRY_COUNT = 2 // Limit for truncation continuation retries

 /**
 * Check if auto-resubmit should happen based on tool errors.
@@ -217,8 +216,6 @@ export default function ChatPanel({

    // Ref to track consecutive auto-retry count (reset on user action)
    const autoRetryCountRef = useRef(0)
-    // Ref to track continuation retry count (for truncation handling)
-    const continuationRetryCountRef = useRef(0)

    // Ref to accumulate partial XML when output is truncated due to maxOutputTokens
    // When partialXmlRef.current.length > 0, we're in continuation mode
@@ -659,25 +656,15 @@ Continue from EXACTLY where you stopped.`,
            if (!shouldRetry) {
                // No error, reset retry count and clear state
                autoRetryCountRef.current = 0
-                continuationRetryCountRef.current = 0
                partialXmlRef.current = ""
                return false
            }

-            // Continuation mode: limited retries for truncation handling
+            // Continuation mode: unlimited retries (truncation continuation, not real errors)
+            // Server limits to 5 steps via stepCountIs(5)
            if (isInContinuationMode) {
-                if (
-                    continuationRetryCountRef.current >=
-                    MAX_CONTINUATION_RETRY_COUNT
-                ) {
-                    toast.error(
-                        `Continuation retry limit reached (${MAX_CONTINUATION_RETRY_COUNT}). The diagram may be too complex.`,
-                    )
-                    continuationRetryCountRef.current = 0
-                    partialXmlRef.current = ""
-                    return false
-                }
-                continuationRetryCountRef.current++
+                // Don't count against retry limit for continuation
+                // Quota checks still apply below
            } else {
                // Regular error: check retry count limit
                if (autoRetryCountRef.current >= MAX_AUTO_RETRY_COUNT) {
@@ -697,7 +684,6 @@ Continue from EXACTLY where you stopped.`,
            if (!tokenLimitCheck.allowed) {
                quotaManager.showTokenLimitToast(tokenLimitCheck.used)
                autoRetryCountRef.current = 0
-                continuationRetryCountRef.current = 0
                partialXmlRef.current = ""
                return false
            }
@@ -706,7 +692,6 @@ Continue from EXACTLY where you stopped.`,
            if (!tpmCheck.allowed) {
                quotaManager.showTPMLimitToast()
                autoRetryCountRef.current = 0
-                continuationRetryCountRef.current = 0
                partialXmlRef.current = ""
                return false
            }
@@ -1039,7 +1024,6 @@ Continue from EXACTLY where you stopped.`,
    ) => {
        // Reset all retry/continuation state on user-initiated message
        autoRetryCountRef.current = 0
-        continuationRetryCountRef.current = 0
        partialXmlRef.current = ""

        const config = getSelectedAIConfig()
--- a/lib/ai-providers.ts
+++ b/lib/ai-providers.ts
@@ -95,8 +95,8 @@ function parseIntSafe(
 * Supports various AI SDK providers with their unique configuration options
 *
 * Environment variables:
- * - OPENAI_REASONING_EFFORT: OpenAI reasoning effort level (minimal/low/medium/high) - for o1/o3/o4/gpt-5
- * - OPENAI_REASONING_SUMMARY: OpenAI reasoning summary (auto/detailed) - auto-enabled for o1/o3/o4/gpt-5
+ * - OPENAI_REASONING_EFFORT: OpenAI reasoning effort level (minimal/low/medium/high) - for o1/o3/gpt-5
+ * - OPENAI_REASONING_SUMMARY: OpenAI reasoning summary (none/brief/detailed) - auto-enabled for o1/o3/gpt-5
 * - ANTHROPIC_THINKING_BUDGET_TOKENS: Anthropic thinking budget in tokens (1024-64000)
 * - ANTHROPIC_THINKING_TYPE: Anthropic thinking type (enabled)
 * - GOOGLE_THINKING_BUDGET: Google Gemini 2.5 thinking budget in tokens (1024-100000)
@@ -118,19 +118,18 @@ function buildProviderOptions(
            const reasoningEffort = process.env.OPENAI_REASONING_EFFORT
            const reasoningSummary = process.env.OPENAI_REASONING_SUMMARY

-            // OpenAI reasoning models (o1, o3, o4, gpt-5) need reasoningSummary to return thoughts
+            // OpenAI reasoning models (o1, o3, gpt-5) need reasoningSummary to return thoughts
            if (
                modelId &&
                (modelId.includes("o1") ||
                    modelId.includes("o3") ||
-                    modelId.includes("o4") ||
                    modelId.includes("gpt-5"))
            ) {
                options.openai = {
-                    // Auto-enable reasoning summary for reasoning models
-                    // Use 'auto' as default since not all models support 'detailed'
+                    // Auto-enable reasoning summary for reasoning models (default: detailed)
                    reasoningSummary:
-                        (reasoningSummary as "auto" | "detailed") || "auto",
+                        (reasoningSummary as "none" | "brief" | "detailed") ||
+                        "detailed",
                }

                // Optionally configure reasoning effort
@@ -153,7 +152,8 @@ function buildProviderOptions(
                }
                if (reasoningSummary) {
                    options.openai.reasoningSummary = reasoningSummary as
-                        | "auto"
+                        | "none"
+                        | "brief"
                        | "detailed"
                }
            }
@@ -593,9 +593,7 @@ export function getAIModel(overrides?: ClientOverrides): ModelConfig {
                    apiKey,
                    ...(baseURL && { baseURL }),
                })
-                // Use Responses API (default) instead of .chat() to support reasoning
-                // for gpt-5, o1, o3, o4 models. Chat Completions API does not emit reasoning events.
-                model = customOpenAI(modelId)
+                model = customOpenAI.chat(modelId)
            } else {
                model = openai(modelId)
            }