mirror of
https://github.com/DayuanJiang/next-ai-draw-io.git
synced 2026-01-03 14:52:28 +08:00
Compare commits
1 Commits
fix/contin
...
chore/upgr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
76d453b533 |
@@ -173,12 +173,9 @@ async function handleChatRequest(req: Request): Promise<Response> {
|
|||||||
: undefined
|
: undefined
|
||||||
|
|
||||||
// Extract user input text for Langfuse trace
|
// Extract user input text for Langfuse trace
|
||||||
// Find the last USER message, not just the last message (which could be assistant in multi-step tool flows)
|
const lastMessage = messages[messages.length - 1]
|
||||||
const lastUserMessage = [...messages]
|
|
||||||
.reverse()
|
|
||||||
.find((m: any) => m.role === "user")
|
|
||||||
const userInputText =
|
const userInputText =
|
||||||
lastUserMessage?.parts?.find((p: any) => p.type === "text")?.text || ""
|
lastMessage?.parts?.find((p: any) => p.type === "text")?.text || ""
|
||||||
|
|
||||||
// Update Langfuse trace with input, session, and user
|
// Update Langfuse trace with input, session, and user
|
||||||
setTraceInput({
|
setTraceInput({
|
||||||
@@ -240,10 +237,9 @@ async function handleChatRequest(req: Request): Promise<Response> {
|
|||||||
// Get the appropriate system prompt based on model (extended for Opus/Haiku 4.5)
|
// Get the appropriate system prompt based on model (extended for Opus/Haiku 4.5)
|
||||||
const systemMessage = getSystemPrompt(modelId, minimalStyle)
|
const systemMessage = getSystemPrompt(modelId, minimalStyle)
|
||||||
|
|
||||||
// Extract file parts (images) from the last user message
|
// Extract file parts (images) from the last message
|
||||||
const fileParts =
|
const fileParts =
|
||||||
lastUserMessage?.parts?.filter((part: any) => part.type === "file") ||
|
lastMessage.parts?.filter((part: any) => part.type === "file") || []
|
||||||
[]
|
|
||||||
|
|
||||||
// User input only - XML is now in a separate cached system message
|
// User input only - XML is now in a separate cached system message
|
||||||
const formattedUserInput = `User input:
|
const formattedUserInput = `User input:
|
||||||
|
|||||||
@@ -76,7 +76,6 @@ interface ChatPanelProps {
|
|||||||
const TOOL_ERROR_STATE = "output-error" as const
|
const TOOL_ERROR_STATE = "output-error" as const
|
||||||
const DEBUG = process.env.NODE_ENV === "development"
|
const DEBUG = process.env.NODE_ENV === "development"
|
||||||
const MAX_AUTO_RETRY_COUNT = 1
|
const MAX_AUTO_RETRY_COUNT = 1
|
||||||
const MAX_CONTINUATION_RETRY_COUNT = 2 // Limit for truncation continuation retries
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if auto-resubmit should happen based on tool errors.
|
* Check if auto-resubmit should happen based on tool errors.
|
||||||
@@ -217,8 +216,6 @@ export default function ChatPanel({
|
|||||||
|
|
||||||
// Ref to track consecutive auto-retry count (reset on user action)
|
// Ref to track consecutive auto-retry count (reset on user action)
|
||||||
const autoRetryCountRef = useRef(0)
|
const autoRetryCountRef = useRef(0)
|
||||||
// Ref to track continuation retry count (for truncation handling)
|
|
||||||
const continuationRetryCountRef = useRef(0)
|
|
||||||
|
|
||||||
// Ref to accumulate partial XML when output is truncated due to maxOutputTokens
|
// Ref to accumulate partial XML when output is truncated due to maxOutputTokens
|
||||||
// When partialXmlRef.current.length > 0, we're in continuation mode
|
// When partialXmlRef.current.length > 0, we're in continuation mode
|
||||||
@@ -659,25 +656,15 @@ Continue from EXACTLY where you stopped.`,
|
|||||||
if (!shouldRetry) {
|
if (!shouldRetry) {
|
||||||
// No error, reset retry count and clear state
|
// No error, reset retry count and clear state
|
||||||
autoRetryCountRef.current = 0
|
autoRetryCountRef.current = 0
|
||||||
continuationRetryCountRef.current = 0
|
|
||||||
partialXmlRef.current = ""
|
partialXmlRef.current = ""
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Continuation mode: limited retries for truncation handling
|
// Continuation mode: unlimited retries (truncation continuation, not real errors)
|
||||||
|
// Server limits to 5 steps via stepCountIs(5)
|
||||||
if (isInContinuationMode) {
|
if (isInContinuationMode) {
|
||||||
if (
|
// Don't count against retry limit for continuation
|
||||||
continuationRetryCountRef.current >=
|
// Quota checks still apply below
|
||||||
MAX_CONTINUATION_RETRY_COUNT
|
|
||||||
) {
|
|
||||||
toast.error(
|
|
||||||
`Continuation retry limit reached (${MAX_CONTINUATION_RETRY_COUNT}). The diagram may be too complex.`,
|
|
||||||
)
|
|
||||||
continuationRetryCountRef.current = 0
|
|
||||||
partialXmlRef.current = ""
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
continuationRetryCountRef.current++
|
|
||||||
} else {
|
} else {
|
||||||
// Regular error: check retry count limit
|
// Regular error: check retry count limit
|
||||||
if (autoRetryCountRef.current >= MAX_AUTO_RETRY_COUNT) {
|
if (autoRetryCountRef.current >= MAX_AUTO_RETRY_COUNT) {
|
||||||
@@ -697,7 +684,6 @@ Continue from EXACTLY where you stopped.`,
|
|||||||
if (!tokenLimitCheck.allowed) {
|
if (!tokenLimitCheck.allowed) {
|
||||||
quotaManager.showTokenLimitToast(tokenLimitCheck.used)
|
quotaManager.showTokenLimitToast(tokenLimitCheck.used)
|
||||||
autoRetryCountRef.current = 0
|
autoRetryCountRef.current = 0
|
||||||
continuationRetryCountRef.current = 0
|
|
||||||
partialXmlRef.current = ""
|
partialXmlRef.current = ""
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@@ -706,7 +692,6 @@ Continue from EXACTLY where you stopped.`,
|
|||||||
if (!tpmCheck.allowed) {
|
if (!tpmCheck.allowed) {
|
||||||
quotaManager.showTPMLimitToast()
|
quotaManager.showTPMLimitToast()
|
||||||
autoRetryCountRef.current = 0
|
autoRetryCountRef.current = 0
|
||||||
continuationRetryCountRef.current = 0
|
|
||||||
partialXmlRef.current = ""
|
partialXmlRef.current = ""
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@@ -1039,7 +1024,6 @@ Continue from EXACTLY where you stopped.`,
|
|||||||
) => {
|
) => {
|
||||||
// Reset all retry/continuation state on user-initiated message
|
// Reset all retry/continuation state on user-initiated message
|
||||||
autoRetryCountRef.current = 0
|
autoRetryCountRef.current = 0
|
||||||
continuationRetryCountRef.current = 0
|
|
||||||
partialXmlRef.current = ""
|
partialXmlRef.current = ""
|
||||||
|
|
||||||
const config = getSelectedAIConfig()
|
const config = getSelectedAIConfig()
|
||||||
|
|||||||
@@ -95,8 +95,8 @@ function parseIntSafe(
|
|||||||
* Supports various AI SDK providers with their unique configuration options
|
* Supports various AI SDK providers with their unique configuration options
|
||||||
*
|
*
|
||||||
* Environment variables:
|
* Environment variables:
|
||||||
* - OPENAI_REASONING_EFFORT: OpenAI reasoning effort level (minimal/low/medium/high) - for o1/o3/o4/gpt-5
|
* - OPENAI_REASONING_EFFORT: OpenAI reasoning effort level (minimal/low/medium/high) - for o1/o3/gpt-5
|
||||||
* - OPENAI_REASONING_SUMMARY: OpenAI reasoning summary (auto/detailed) - auto-enabled for o1/o3/o4/gpt-5
|
* - OPENAI_REASONING_SUMMARY: OpenAI reasoning summary (none/brief/detailed) - auto-enabled for o1/o3/gpt-5
|
||||||
* - ANTHROPIC_THINKING_BUDGET_TOKENS: Anthropic thinking budget in tokens (1024-64000)
|
* - ANTHROPIC_THINKING_BUDGET_TOKENS: Anthropic thinking budget in tokens (1024-64000)
|
||||||
* - ANTHROPIC_THINKING_TYPE: Anthropic thinking type (enabled)
|
* - ANTHROPIC_THINKING_TYPE: Anthropic thinking type (enabled)
|
||||||
* - GOOGLE_THINKING_BUDGET: Google Gemini 2.5 thinking budget in tokens (1024-100000)
|
* - GOOGLE_THINKING_BUDGET: Google Gemini 2.5 thinking budget in tokens (1024-100000)
|
||||||
@@ -118,19 +118,18 @@ function buildProviderOptions(
|
|||||||
const reasoningEffort = process.env.OPENAI_REASONING_EFFORT
|
const reasoningEffort = process.env.OPENAI_REASONING_EFFORT
|
||||||
const reasoningSummary = process.env.OPENAI_REASONING_SUMMARY
|
const reasoningSummary = process.env.OPENAI_REASONING_SUMMARY
|
||||||
|
|
||||||
// OpenAI reasoning models (o1, o3, o4, gpt-5) need reasoningSummary to return thoughts
|
// OpenAI reasoning models (o1, o3, gpt-5) need reasoningSummary to return thoughts
|
||||||
if (
|
if (
|
||||||
modelId &&
|
modelId &&
|
||||||
(modelId.includes("o1") ||
|
(modelId.includes("o1") ||
|
||||||
modelId.includes("o3") ||
|
modelId.includes("o3") ||
|
||||||
modelId.includes("o4") ||
|
|
||||||
modelId.includes("gpt-5"))
|
modelId.includes("gpt-5"))
|
||||||
) {
|
) {
|
||||||
options.openai = {
|
options.openai = {
|
||||||
// Auto-enable reasoning summary for reasoning models
|
// Auto-enable reasoning summary for reasoning models (default: detailed)
|
||||||
// Use 'auto' as default since not all models support 'detailed'
|
|
||||||
reasoningSummary:
|
reasoningSummary:
|
||||||
(reasoningSummary as "auto" | "detailed") || "auto",
|
(reasoningSummary as "none" | "brief" | "detailed") ||
|
||||||
|
"detailed",
|
||||||
}
|
}
|
||||||
|
|
||||||
// Optionally configure reasoning effort
|
// Optionally configure reasoning effort
|
||||||
@@ -153,7 +152,8 @@ function buildProviderOptions(
|
|||||||
}
|
}
|
||||||
if (reasoningSummary) {
|
if (reasoningSummary) {
|
||||||
options.openai.reasoningSummary = reasoningSummary as
|
options.openai.reasoningSummary = reasoningSummary as
|
||||||
| "auto"
|
| "none"
|
||||||
|
| "brief"
|
||||||
| "detailed"
|
| "detailed"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -593,9 +593,7 @@ export function getAIModel(overrides?: ClientOverrides): ModelConfig {
|
|||||||
apiKey,
|
apiKey,
|
||||||
...(baseURL && { baseURL }),
|
...(baseURL && { baseURL }),
|
||||||
})
|
})
|
||||||
// Use Responses API (default) instead of .chat() to support reasoning
|
model = customOpenAI.chat(modelId)
|
||||||
// for gpt-5, o1, o3, o4 models. Chat Completions API does not emit reasoning events.
|
|
||||||
model = customOpenAI(modelId)
|
|
||||||
} else {
|
} else {
|
||||||
model = openai(modelId)
|
model = openai(modelId)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user