Compare commits

..

4 Commits

Author SHA1 Message Date
dayuan.jiang
5a9fed2d31 fix: add continuation retry limit for truncated diagrams
Previously, continuation mode (for truncated XML) had unlimited client-side
retries, relying only on server stepCountIs(5) limit. This could cause
excessive API calls (495 observed) when XML truncation kept occurring.

Added MAX_CONTINUATION_RETRY_COUNT=2 to limit continuation attempts:
- After 2 failed continuation attempts, shows error toast and stops
- Resets on successful completion or user-initiated message
- Also resets when quota limits are hit
2025-12-23 14:10:51 +09:00
Dayuan Jiang
a0fbc0ad33 fix: use last user message for Langfuse trace input (#371)
In multi-step tool flows, messages array contains assistant messages
from previous steps. Using messages[messages.length - 1] would record
the assistant's response as trace input instead of the user's question.
2025-12-23 13:43:28 +09:00
Dayuan Jiang
0385c45a10 fix: OpenAI reasoning/thinking blocks not showing (#370)
- Use Responses API instead of Chat Completions API for OpenAI
  (.chat() -> default call) to support reasoning events
- Add o4 to reasoning model detection
- Change default reasoningSummary from 'detailed' to 'auto'
  (not all models support 'detailed')
- Update types to match AI SDK: 'auto' | 'detailed'
2025-12-23 13:38:50 +09:00
Dayuan Jiang
5262b7bfb2 chore: upgrade AI SDK to v6.0.1 (#369)
- Upgrade ai package from ^5.0.89 to ^6.0.1
- Upgrade @ai-sdk/* provider packages to latest v3/v4
- Update convertToModelMessages call to async (new API)
- Fix usage.cachedInputTokens to usage.inputTokenDetails?.cacheReadTokens
2025-12-23 13:31:42 +09:00
3 changed files with 39 additions and 17 deletions

View File

@@ -173,9 +173,12 @@ async function handleChatRequest(req: Request): Promise<Response> {
: undefined
// Extract user input text for Langfuse trace
const lastMessage = messages[messages.length - 1]
// Find the last USER message, not just the last message (which could be assistant in multi-step tool flows)
const lastUserMessage = [...messages]
.reverse()
.find((m: any) => m.role === "user")
const userInputText =
lastMessage?.parts?.find((p: any) => p.type === "text")?.text || ""
lastUserMessage?.parts?.find((p: any) => p.type === "text")?.text || ""
// Update Langfuse trace with input, session, and user
setTraceInput({
@@ -237,9 +240,10 @@ async function handleChatRequest(req: Request): Promise<Response> {
// Get the appropriate system prompt based on model (extended for Opus/Haiku 4.5)
const systemMessage = getSystemPrompt(modelId, minimalStyle)
// Extract file parts (images) from the last message
// Extract file parts (images) from the last user message
const fileParts =
lastMessage.parts?.filter((part: any) => part.type === "file") || []
lastUserMessage?.parts?.filter((part: any) => part.type === "file") ||
[]
// User input only - XML is now in a separate cached system message
const formattedUserInput = `User input:

View File

@@ -76,6 +76,7 @@ interface ChatPanelProps {
const TOOL_ERROR_STATE = "output-error" as const
const DEBUG = process.env.NODE_ENV === "development"
const MAX_AUTO_RETRY_COUNT = 1
const MAX_CONTINUATION_RETRY_COUNT = 2 // Limit for truncation continuation retries
/**
* Check if auto-resubmit should happen based on tool errors.
@@ -216,6 +217,8 @@ export default function ChatPanel({
// Ref to track consecutive auto-retry count (reset on user action)
const autoRetryCountRef = useRef(0)
// Ref to track continuation retry count (for truncation handling)
const continuationRetryCountRef = useRef(0)
// Ref to accumulate partial XML when output is truncated due to maxOutputTokens
// When partialXmlRef.current.length > 0, we're in continuation mode
@@ -656,15 +659,25 @@ Continue from EXACTLY where you stopped.`,
if (!shouldRetry) {
// No error, reset retry count and clear state
autoRetryCountRef.current = 0
continuationRetryCountRef.current = 0
partialXmlRef.current = ""
return false
}
// Continuation mode: unlimited retries (truncation continuation, not real errors)
// Server limits to 5 steps via stepCountIs(5)
// Continuation mode: limited retries for truncation handling
if (isInContinuationMode) {
// Don't count against retry limit for continuation
// Quota checks still apply below
if (
continuationRetryCountRef.current >=
MAX_CONTINUATION_RETRY_COUNT
) {
toast.error(
`Continuation retry limit reached (${MAX_CONTINUATION_RETRY_COUNT}). The diagram may be too complex.`,
)
continuationRetryCountRef.current = 0
partialXmlRef.current = ""
return false
}
continuationRetryCountRef.current++
} else {
// Regular error: check retry count limit
if (autoRetryCountRef.current >= MAX_AUTO_RETRY_COUNT) {
@@ -684,6 +697,7 @@ Continue from EXACTLY where you stopped.`,
if (!tokenLimitCheck.allowed) {
quotaManager.showTokenLimitToast(tokenLimitCheck.used)
autoRetryCountRef.current = 0
continuationRetryCountRef.current = 0
partialXmlRef.current = ""
return false
}
@@ -692,6 +706,7 @@ Continue from EXACTLY where you stopped.`,
if (!tpmCheck.allowed) {
quotaManager.showTPMLimitToast()
autoRetryCountRef.current = 0
continuationRetryCountRef.current = 0
partialXmlRef.current = ""
return false
}
@@ -1024,6 +1039,7 @@ Continue from EXACTLY where you stopped.`,
) => {
// Reset all retry/continuation state on user-initiated message
autoRetryCountRef.current = 0
continuationRetryCountRef.current = 0
partialXmlRef.current = ""
const config = getSelectedAIConfig()

View File

@@ -95,8 +95,8 @@ function parseIntSafe(
* Supports various AI SDK providers with their unique configuration options
*
* Environment variables:
* - OPENAI_REASONING_EFFORT: OpenAI reasoning effort level (minimal/low/medium/high) - for o1/o3/gpt-5
* - OPENAI_REASONING_SUMMARY: OpenAI reasoning summary (none/brief/detailed) - auto-enabled for o1/o3/gpt-5
* - OPENAI_REASONING_EFFORT: OpenAI reasoning effort level (minimal/low/medium/high) - for o1/o3/o4/gpt-5
* - OPENAI_REASONING_SUMMARY: OpenAI reasoning summary (auto/detailed) - auto-enabled for o1/o3/o4/gpt-5
* - ANTHROPIC_THINKING_BUDGET_TOKENS: Anthropic thinking budget in tokens (1024-64000)
* - ANTHROPIC_THINKING_TYPE: Anthropic thinking type (enabled)
* - GOOGLE_THINKING_BUDGET: Google Gemini 2.5 thinking budget in tokens (1024-100000)
@@ -118,18 +118,19 @@ function buildProviderOptions(
const reasoningEffort = process.env.OPENAI_REASONING_EFFORT
const reasoningSummary = process.env.OPENAI_REASONING_SUMMARY
// OpenAI reasoning models (o1, o3, gpt-5) need reasoningSummary to return thoughts
// OpenAI reasoning models (o1, o3, o4, gpt-5) need reasoningSummary to return thoughts
if (
modelId &&
(modelId.includes("o1") ||
modelId.includes("o3") ||
modelId.includes("o4") ||
modelId.includes("gpt-5"))
) {
options.openai = {
// Auto-enable reasoning summary for reasoning models (default: detailed)
// Auto-enable reasoning summary for reasoning models
// Use 'auto' as default since not all models support 'detailed'
reasoningSummary:
(reasoningSummary as "none" | "brief" | "detailed") ||
"detailed",
(reasoningSummary as "auto" | "detailed") || "auto",
}
// Optionally configure reasoning effort
@@ -152,8 +153,7 @@ function buildProviderOptions(
}
if (reasoningSummary) {
options.openai.reasoningSummary = reasoningSummary as
| "none"
| "brief"
| "auto"
| "detailed"
}
}
@@ -593,7 +593,9 @@ export function getAIModel(overrides?: ClientOverrides): ModelConfig {
apiKey,
...(baseURL && { baseURL }),
})
model = customOpenAI.chat(modelId)
// Use Responses API (default) instead of .chat() to support reasoning
// for gpt-5, o1, o3, o4 models. Chat Completions API does not emit reasoning events.
model = customOpenAI(modelId)
} else {
model = openai(modelId)
}