mirror of
https://github.com/DayuanJiang/next-ai-draw-io.git
synced 2026-01-02 22:32:27 +08:00
Compare commits
4 Commits
chore/upgr
...
fix/contin
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5a9fed2d31 | ||
|
|
a0fbc0ad33 | ||
|
|
0385c45a10 | ||
|
|
5262b7bfb2 |
@@ -173,9 +173,12 @@ async function handleChatRequest(req: Request): Promise<Response> {
|
||||
: undefined
|
||||
|
||||
// Extract user input text for Langfuse trace
|
||||
const lastMessage = messages[messages.length - 1]
|
||||
// Find the last USER message, not just the last message (which could be assistant in multi-step tool flows)
|
||||
const lastUserMessage = [...messages]
|
||||
.reverse()
|
||||
.find((m: any) => m.role === "user")
|
||||
const userInputText =
|
||||
lastMessage?.parts?.find((p: any) => p.type === "text")?.text || ""
|
||||
lastUserMessage?.parts?.find((p: any) => p.type === "text")?.text || ""
|
||||
|
||||
// Update Langfuse trace with input, session, and user
|
||||
setTraceInput({
|
||||
@@ -237,9 +240,10 @@ async function handleChatRequest(req: Request): Promise<Response> {
|
||||
// Get the appropriate system prompt based on model (extended for Opus/Haiku 4.5)
|
||||
const systemMessage = getSystemPrompt(modelId, minimalStyle)
|
||||
|
||||
// Extract file parts (images) from the last message
|
||||
// Extract file parts (images) from the last user message
|
||||
const fileParts =
|
||||
lastMessage.parts?.filter((part: any) => part.type === "file") || []
|
||||
lastUserMessage?.parts?.filter((part: any) => part.type === "file") ||
|
||||
[]
|
||||
|
||||
// User input only - XML is now in a separate cached system message
|
||||
const formattedUserInput = `User input:
|
||||
|
||||
@@ -76,6 +76,7 @@ interface ChatPanelProps {
|
||||
const TOOL_ERROR_STATE = "output-error" as const
|
||||
const DEBUG = process.env.NODE_ENV === "development"
|
||||
const MAX_AUTO_RETRY_COUNT = 1
|
||||
const MAX_CONTINUATION_RETRY_COUNT = 2 // Limit for truncation continuation retries
|
||||
|
||||
/**
|
||||
* Check if auto-resubmit should happen based on tool errors.
|
||||
@@ -216,6 +217,8 @@ export default function ChatPanel({
|
||||
|
||||
// Ref to track consecutive auto-retry count (reset on user action)
|
||||
const autoRetryCountRef = useRef(0)
|
||||
// Ref to track continuation retry count (for truncation handling)
|
||||
const continuationRetryCountRef = useRef(0)
|
||||
|
||||
// Ref to accumulate partial XML when output is truncated due to maxOutputTokens
|
||||
// When partialXmlRef.current.length > 0, we're in continuation mode
|
||||
@@ -656,15 +659,25 @@ Continue from EXACTLY where you stopped.`,
|
||||
if (!shouldRetry) {
|
||||
// No error, reset retry count and clear state
|
||||
autoRetryCountRef.current = 0
|
||||
continuationRetryCountRef.current = 0
|
||||
partialXmlRef.current = ""
|
||||
return false
|
||||
}
|
||||
|
||||
// Continuation mode: unlimited retries (truncation continuation, not real errors)
|
||||
// Server limits to 5 steps via stepCountIs(5)
|
||||
// Continuation mode: limited retries for truncation handling
|
||||
if (isInContinuationMode) {
|
||||
// Don't count against retry limit for continuation
|
||||
// Quota checks still apply below
|
||||
if (
|
||||
continuationRetryCountRef.current >=
|
||||
MAX_CONTINUATION_RETRY_COUNT
|
||||
) {
|
||||
toast.error(
|
||||
`Continuation retry limit reached (${MAX_CONTINUATION_RETRY_COUNT}). The diagram may be too complex.`,
|
||||
)
|
||||
continuationRetryCountRef.current = 0
|
||||
partialXmlRef.current = ""
|
||||
return false
|
||||
}
|
||||
continuationRetryCountRef.current++
|
||||
} else {
|
||||
// Regular error: check retry count limit
|
||||
if (autoRetryCountRef.current >= MAX_AUTO_RETRY_COUNT) {
|
||||
@@ -684,6 +697,7 @@ Continue from EXACTLY where you stopped.`,
|
||||
if (!tokenLimitCheck.allowed) {
|
||||
quotaManager.showTokenLimitToast(tokenLimitCheck.used)
|
||||
autoRetryCountRef.current = 0
|
||||
continuationRetryCountRef.current = 0
|
||||
partialXmlRef.current = ""
|
||||
return false
|
||||
}
|
||||
@@ -692,6 +706,7 @@ Continue from EXACTLY where you stopped.`,
|
||||
if (!tpmCheck.allowed) {
|
||||
quotaManager.showTPMLimitToast()
|
||||
autoRetryCountRef.current = 0
|
||||
continuationRetryCountRef.current = 0
|
||||
partialXmlRef.current = ""
|
||||
return false
|
||||
}
|
||||
@@ -1024,6 +1039,7 @@ Continue from EXACTLY where you stopped.`,
|
||||
) => {
|
||||
// Reset all retry/continuation state on user-initiated message
|
||||
autoRetryCountRef.current = 0
|
||||
continuationRetryCountRef.current = 0
|
||||
partialXmlRef.current = ""
|
||||
|
||||
const config = getSelectedAIConfig()
|
||||
|
||||
@@ -95,8 +95,8 @@ function parseIntSafe(
|
||||
* Supports various AI SDK providers with their unique configuration options
|
||||
*
|
||||
* Environment variables:
|
||||
* - OPENAI_REASONING_EFFORT: OpenAI reasoning effort level (minimal/low/medium/high) - for o1/o3/gpt-5
|
||||
* - OPENAI_REASONING_SUMMARY: OpenAI reasoning summary (none/brief/detailed) - auto-enabled for o1/o3/gpt-5
|
||||
* - OPENAI_REASONING_EFFORT: OpenAI reasoning effort level (minimal/low/medium/high) - for o1/o3/o4/gpt-5
|
||||
* - OPENAI_REASONING_SUMMARY: OpenAI reasoning summary (auto/detailed) - auto-enabled for o1/o3/o4/gpt-5
|
||||
* - ANTHROPIC_THINKING_BUDGET_TOKENS: Anthropic thinking budget in tokens (1024-64000)
|
||||
* - ANTHROPIC_THINKING_TYPE: Anthropic thinking type (enabled)
|
||||
* - GOOGLE_THINKING_BUDGET: Google Gemini 2.5 thinking budget in tokens (1024-100000)
|
||||
@@ -118,18 +118,19 @@ function buildProviderOptions(
|
||||
const reasoningEffort = process.env.OPENAI_REASONING_EFFORT
|
||||
const reasoningSummary = process.env.OPENAI_REASONING_SUMMARY
|
||||
|
||||
// OpenAI reasoning models (o1, o3, gpt-5) need reasoningSummary to return thoughts
|
||||
// OpenAI reasoning models (o1, o3, o4, gpt-5) need reasoningSummary to return thoughts
|
||||
if (
|
||||
modelId &&
|
||||
(modelId.includes("o1") ||
|
||||
modelId.includes("o3") ||
|
||||
modelId.includes("o4") ||
|
||||
modelId.includes("gpt-5"))
|
||||
) {
|
||||
options.openai = {
|
||||
// Auto-enable reasoning summary for reasoning models (default: detailed)
|
||||
// Auto-enable reasoning summary for reasoning models
|
||||
// Use 'auto' as default since not all models support 'detailed'
|
||||
reasoningSummary:
|
||||
(reasoningSummary as "none" | "brief" | "detailed") ||
|
||||
"detailed",
|
||||
(reasoningSummary as "auto" | "detailed") || "auto",
|
||||
}
|
||||
|
||||
// Optionally configure reasoning effort
|
||||
@@ -152,8 +153,7 @@ function buildProviderOptions(
|
||||
}
|
||||
if (reasoningSummary) {
|
||||
options.openai.reasoningSummary = reasoningSummary as
|
||||
| "none"
|
||||
| "brief"
|
||||
| "auto"
|
||||
| "detailed"
|
||||
}
|
||||
}
|
||||
@@ -593,7 +593,9 @@ export function getAIModel(overrides?: ClientOverrides): ModelConfig {
|
||||
apiKey,
|
||||
...(baseURL && { baseURL }),
|
||||
})
|
||||
model = customOpenAI.chat(modelId)
|
||||
// Use Responses API (default) instead of .chat() to support reasoning
|
||||
// for gpt-5, o1, o3, o4 models. Chat Completions API does not emit reasoning events.
|
||||
model = customOpenAI(modelId)
|
||||
} else {
|
||||
model = openai(modelId)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user