Compare commits

..

1 Commits

Author SHA1 Message Date
dayuan.jiang
76d453b533 chore: upgrade AI SDK to v6.0.1
- Upgrade ai package from ^5.0.89 to ^6.0.1
- Upgrade @ai-sdk/* provider packages to latest v3/v4
- Update convertToModelMessages call to async (new API)
- Fix usage.cachedInputTokens to usage.inputTokenDetails?.cacheReadTokens
2025-12-23 13:13:21 +09:00
3 changed files with 17 additions and 39 deletions

View File

@@ -173,12 +173,9 @@ async function handleChatRequest(req: Request): Promise<Response> {
: undefined
// Extract user input text for Langfuse trace
// Find the last USER message, not just the last message (which could be assistant in multi-step tool flows)
const lastUserMessage = [...messages]
.reverse()
.find((m: any) => m.role === "user")
const lastMessage = messages[messages.length - 1]
const userInputText =
lastUserMessage?.parts?.find((p: any) => p.type === "text")?.text || ""
lastMessage?.parts?.find((p: any) => p.type === "text")?.text || ""
// Update Langfuse trace with input, session, and user
setTraceInput({
@@ -240,10 +237,9 @@ async function handleChatRequest(req: Request): Promise<Response> {
// Get the appropriate system prompt based on model (extended for Opus/Haiku 4.5)
const systemMessage = getSystemPrompt(modelId, minimalStyle)
// Extract file parts (images) from the last user message
// Extract file parts (images) from the last message
const fileParts =
lastUserMessage?.parts?.filter((part: any) => part.type === "file") ||
[]
lastMessage.parts?.filter((part: any) => part.type === "file") || []
// User input only - XML is now in a separate cached system message
const formattedUserInput = `User input:

View File

@@ -76,7 +76,6 @@ interface ChatPanelProps {
const TOOL_ERROR_STATE = "output-error" as const
const DEBUG = process.env.NODE_ENV === "development"
const MAX_AUTO_RETRY_COUNT = 1
const MAX_CONTINUATION_RETRY_COUNT = 2 // Limit for truncation continuation retries
/**
* Check if auto-resubmit should happen based on tool errors.
@@ -217,8 +216,6 @@ export default function ChatPanel({
// Ref to track consecutive auto-retry count (reset on user action)
const autoRetryCountRef = useRef(0)
// Ref to track continuation retry count (for truncation handling)
const continuationRetryCountRef = useRef(0)
// Ref to accumulate partial XML when output is truncated due to maxOutputTokens
// When partialXmlRef.current.length > 0, we're in continuation mode
@@ -659,25 +656,15 @@ Continue from EXACTLY where you stopped.`,
if (!shouldRetry) {
// No error, reset retry count and clear state
autoRetryCountRef.current = 0
continuationRetryCountRef.current = 0
partialXmlRef.current = ""
return false
}
// Continuation mode: limited retries for truncation handling
// Continuation mode: unlimited retries (truncation continuation, not real errors)
// Server limits to 5 steps via stepCountIs(5)
if (isInContinuationMode) {
if (
continuationRetryCountRef.current >=
MAX_CONTINUATION_RETRY_COUNT
) {
toast.error(
`Continuation retry limit reached (${MAX_CONTINUATION_RETRY_COUNT}). The diagram may be too complex.`,
)
continuationRetryCountRef.current = 0
partialXmlRef.current = ""
return false
}
continuationRetryCountRef.current++
// Don't count against retry limit for continuation
// Quota checks still apply below
} else {
// Regular error: check retry count limit
if (autoRetryCountRef.current >= MAX_AUTO_RETRY_COUNT) {
@@ -697,7 +684,6 @@ Continue from EXACTLY where you stopped.`,
if (!tokenLimitCheck.allowed) {
quotaManager.showTokenLimitToast(tokenLimitCheck.used)
autoRetryCountRef.current = 0
continuationRetryCountRef.current = 0
partialXmlRef.current = ""
return false
}
@@ -706,7 +692,6 @@ Continue from EXACTLY where you stopped.`,
if (!tpmCheck.allowed) {
quotaManager.showTPMLimitToast()
autoRetryCountRef.current = 0
continuationRetryCountRef.current = 0
partialXmlRef.current = ""
return false
}
@@ -1039,7 +1024,6 @@ Continue from EXACTLY where you stopped.`,
) => {
// Reset all retry/continuation state on user-initiated message
autoRetryCountRef.current = 0
continuationRetryCountRef.current = 0
partialXmlRef.current = ""
const config = getSelectedAIConfig()

View File

@@ -95,8 +95,8 @@ function parseIntSafe(
* Supports various AI SDK providers with their unique configuration options
*
* Environment variables:
* - OPENAI_REASONING_EFFORT: OpenAI reasoning effort level (minimal/low/medium/high) - for o1/o3/o4/gpt-5
* - OPENAI_REASONING_SUMMARY: OpenAI reasoning summary (auto/detailed) - auto-enabled for o1/o3/o4/gpt-5
* - OPENAI_REASONING_EFFORT: OpenAI reasoning effort level (minimal/low/medium/high) - for o1/o3/gpt-5
* - OPENAI_REASONING_SUMMARY: OpenAI reasoning summary (none/brief/detailed) - auto-enabled for o1/o3/gpt-5
* - ANTHROPIC_THINKING_BUDGET_TOKENS: Anthropic thinking budget in tokens (1024-64000)
* - ANTHROPIC_THINKING_TYPE: Anthropic thinking type (enabled)
* - GOOGLE_THINKING_BUDGET: Google Gemini 2.5 thinking budget in tokens (1024-100000)
@@ -118,19 +118,18 @@ function buildProviderOptions(
const reasoningEffort = process.env.OPENAI_REASONING_EFFORT
const reasoningSummary = process.env.OPENAI_REASONING_SUMMARY
// OpenAI reasoning models (o1, o3, o4, gpt-5) need reasoningSummary to return thoughts
// OpenAI reasoning models (o1, o3, gpt-5) need reasoningSummary to return thoughts
if (
modelId &&
(modelId.includes("o1") ||
modelId.includes("o3") ||
modelId.includes("o4") ||
modelId.includes("gpt-5"))
) {
options.openai = {
// Auto-enable reasoning summary for reasoning models
// Use 'auto' as default since not all models support 'detailed'
// Auto-enable reasoning summary for reasoning models (default: detailed)
reasoningSummary:
(reasoningSummary as "auto" | "detailed") || "auto",
(reasoningSummary as "none" | "brief" | "detailed") ||
"detailed",
}
// Optionally configure reasoning effort
@@ -153,7 +152,8 @@ function buildProviderOptions(
}
if (reasoningSummary) {
options.openai.reasoningSummary = reasoningSummary as
| "auto"
| "none"
| "brief"
| "detailed"
}
}
@@ -593,9 +593,7 @@ export function getAIModel(overrides?: ClientOverrides): ModelConfig {
apiKey,
...(baseURL && { baseURL }),
})
// Use Responses API (default) instead of .chat() to support reasoning
// for gpt-5, o1, o3, o4 models. Chat Completions API does not emit reasoning events.
model = customOpenAI(modelId)
model = customOpenAI.chat(modelId)
} else {
model = openai(modelId)
}