Compare commits

..

8 Commits

Author SHA1 Message Date
dayuan.jiang
29121f5e78 fix: use totalUsage with all token types for accurate quota tracking
The onFinish callback's 'usage' only contains the final step's tokens,
which underreports usage for multi-step tool calls (like diagram generation).
Changed to 'totalUsage' which provides cumulative counts across all steps.

Include all 4 token types for accurate counting:
1. inputTokens - non-cached input tokens
2. outputTokens - generated output tokens
3. cachedInputTokens - tokens read from prompt cache
4. inputTokenDetails.cacheWriteTokens - tokens written to cache

Tested locally:
- Request 1 (cache write): 334 + 62 + 0 + 6671 = 7,067 tokens
- Request 2 (cache read): 334 + 184 + 6551 + 120 = 7,189 tokens
- DynamoDB total: 14,256 ✓
2025-12-23 20:16:24 +09:00
Dayuan Jiang
7de192e1fa fix: enable progressive diagram rendering during streaming (#380)
- Add extractCompleteMxCells() to extract only complete mxCell elements from partial XML
- Remove useEffect cleanup that was killing debounce timeouts on every re-render
- Wrap XML in <root> tags for proper DOMParser validation

Previously, diagrams only rendered after ALL XML finished streaming because:
1. useEffect cleanup cleared the 150ms debounce timeout on every message change
2. DOMParser rejected partial XML like '<mxCell id="2" value="...' (incomplete)

Now each complete mxCell renders progressively as it finishes streaming.
2025-12-23 18:54:03 +09:00
Dayuan Jiang
97ae9395cd feat: add server-side quota tracking with DynamoDB (#379)
- Add dynamo-quota-manager.ts for atomic quota checks using ConditionExpression
- Enforce daily request limit, daily token limit, and TPM limit
- Return 429 with quota details (type, used, limit) when exceeded
- Quota is opt-in: only enabled when DYNAMODB_QUOTA_TABLE env var is set
- Remove client-side quota enforcement (server is now source of truth)
- Simplify use-quota-manager.tsx to only display toasts
- Add @aws-sdk/client-dynamodb dependency
2025-12-23 18:36:27 +09:00
Dayuan Jiang
5ec05eb100 refactor: simplify Langfuse integration with AI SDK 6 (#375)
- Remove manual token attribute setting (AI SDK 6 telemetry auto-reports)
- Use totalTokens directly instead of inputTokens + outputTokens calculation
- Fix sessionId bug in log-save/log-feedback (prevents wrong trace attachment)
- Hash IP addresses for privacy instead of storing raw IPs
- Fix isLangfuseEnabled() to check both keys for consistency
2025-12-23 16:26:45 +09:00
Dayuan Jiang
9aec7eda79 fix: add continuation retry limit for truncated diagrams (#372)
Previously, continuation mode (for truncated XML) had unlimited client-side
retries, relying only on server stepCountIs(5) limit. This could cause
excessive API calls (495 observed) when XML truncation kept occurring.

Added MAX_CONTINUATION_RETRY_COUNT=2 to limit continuation attempts:
- After 2 failed continuation attempts, shows error toast and stops
- Resets on successful completion or user-initiated message
- Also resets when quota limits are hit
2025-12-23 14:17:06 +09:00
Dayuan Jiang
a0fbc0ad33 fix: use last user message for Langfuse trace input (#371)
In multi-step tool flows, messages array contains assistant messages
from previous steps. Using messages[messages.length - 1] would record
the assistant's response as trace input instead of the user's question.
2025-12-23 13:43:28 +09:00
Dayuan Jiang
0385c45a10 fix: OpenAI reasoning/thinking blocks not showing (#370)
- Use Responses API instead of Chat Completions API for OpenAI
  (.chat() -> default call) to support reasoning events
- Add o4 to reasoning model detection
- Change default reasoningSummary from 'detailed' to 'auto'
  (not all models support 'detailed')
- Update types to match AI SDK: 'auto' | 'detailed'
2025-12-23 13:38:50 +09:00
Dayuan Jiang
5262b7bfb2 chore: upgrade AI SDK to v6.0.1 (#369)
- Upgrade ai package from ^5.0.89 to ^6.0.1
- Upgrade @ai-sdk/* provider packages to latest v3/v4
- Update convertToModelMessages call to async (new API)
- Fix usage.cachedInputTokens to usage.inputTokenDetails?.cacheReadTokens
2025-12-23 13:31:42 +09:00
11 changed files with 1416 additions and 763 deletions

View File

@@ -14,6 +14,11 @@ import path from "path"
import { z } from "zod" import { z } from "zod"
import { getAIModel, supportsPromptCaching } from "@/lib/ai-providers" import { getAIModel, supportsPromptCaching } from "@/lib/ai-providers"
import { findCachedResponse } from "@/lib/cached-responses" import { findCachedResponse } from "@/lib/cached-responses"
import {
checkAndIncrementRequest,
isQuotaEnabled,
recordTokenUsage,
} from "@/lib/dynamo-quota-manager"
import { import {
getTelemetryConfig, getTelemetryConfig,
setTraceInput, setTraceInput,
@@ -162,9 +167,13 @@ async function handleChatRequest(req: Request): Promise<Response> {
const { messages, xml, previousXml, sessionId } = await req.json() const { messages, xml, previousXml, sessionId } = await req.json()
// Get user IP for Langfuse tracking // Get user IP for Langfuse tracking (hashed for privacy)
const forwardedFor = req.headers.get("x-forwarded-for") const forwardedFor = req.headers.get("x-forwarded-for")
const userId = forwardedFor?.split(",")[0]?.trim() || "anonymous" const rawIp = forwardedFor?.split(",")[0]?.trim() || "anonymous"
const userId =
rawIp === "anonymous"
? rawIp
: `user-${Buffer.from(rawIp).toString("base64url").slice(0, 8)}`
// Validate sessionId for Langfuse (must be string, max 200 chars) // Validate sessionId for Langfuse (must be string, max 200 chars)
const validSessionId = const validSessionId =
@@ -173,9 +182,12 @@ async function handleChatRequest(req: Request): Promise<Response> {
: undefined : undefined
// Extract user input text for Langfuse trace // Extract user input text for Langfuse trace
const lastMessage = messages[messages.length - 1] // Find the last USER message, not just the last message (which could be assistant in multi-step tool flows)
const lastUserMessage = [...messages]
.reverse()
.find((m: any) => m.role === "user")
const userInputText = const userInputText =
lastMessage?.parts?.find((p: any) => p.type === "text")?.text || "" lastUserMessage?.parts?.find((p: any) => p.type === "text")?.text || ""
// Update Langfuse trace with input, session, and user // Update Langfuse trace with input, session, and user
setTraceInput({ setTraceInput({
@@ -184,6 +196,33 @@ async function handleChatRequest(req: Request): Promise<Response> {
userId: userId, userId: userId,
}) })
// === SERVER-SIDE QUOTA CHECK START ===
// Quota is opt-in: only enabled when DYNAMODB_QUOTA_TABLE env var is set
const hasOwnApiKey = !!(
req.headers.get("x-ai-provider") && req.headers.get("x-ai-api-key")
)
// Skip quota check if: quota disabled, user has own API key, or is anonymous
if (isQuotaEnabled() && !hasOwnApiKey && userId !== "anonymous") {
const quotaCheck = await checkAndIncrementRequest(userId, {
requests: Number(process.env.DAILY_REQUEST_LIMIT) || 10,
tokens: Number(process.env.DAILY_TOKEN_LIMIT) || 200000,
tpm: Number(process.env.TPM_LIMIT) || 20000,
})
if (!quotaCheck.allowed) {
return Response.json(
{
error: quotaCheck.error,
type: quotaCheck.type,
used: quotaCheck.used,
limit: quotaCheck.limit,
},
{ status: 429 },
)
}
}
// === SERVER-SIDE QUOTA CHECK END ===
// === FILE VALIDATION START === // === FILE VALIDATION START ===
const fileValidation = validateFileParts(messages) const fileValidation = validateFileParts(messages)
if (!fileValidation.valid) { if (!fileValidation.valid) {
@@ -237,9 +276,10 @@ async function handleChatRequest(req: Request): Promise<Response> {
// Get the appropriate system prompt based on model (extended for Opus/Haiku 4.5) // Get the appropriate system prompt based on model (extended for Opus/Haiku 4.5)
const systemMessage = getSystemPrompt(modelId, minimalStyle) const systemMessage = getSystemPrompt(modelId, minimalStyle)
// Extract file parts (images) from the last message // Extract file parts (images) from the last user message
const fileParts = const fileParts =
lastMessage.parts?.filter((part: any) => part.type === "file") || [] lastUserMessage?.parts?.filter((part: any) => part.type === "file") ||
[]
// User input only - XML is now in a separate cached system message // User input only - XML is now in a separate cached system message
const formattedUserInput = `User input: const formattedUserInput = `User input:
@@ -248,7 +288,7 @@ ${userInputText}
"""` """`
// Convert UIMessages to ModelMessages and add system message // Convert UIMessages to ModelMessages and add system message
const modelMessages = convertToModelMessages(messages) const modelMessages = await convertToModelMessages(messages)
// DEBUG: Log incoming messages structure // DEBUG: Log incoming messages structure
console.log("[route.ts] Incoming messages count:", messages.length) console.log("[route.ts] Incoming messages count:", messages.length)
@@ -502,12 +542,26 @@ ${userInputText}
userId, userId,
}), }),
}), }),
onFinish: ({ text, usage }) => { onFinish: ({ text, totalUsage }) => {
// Pass usage to Langfuse (Bedrock streaming doesn't auto-report tokens to telemetry) // AI SDK 6 telemetry auto-reports token usage on its spans
setTraceOutput(text, { setTraceOutput(text)
promptTokens: usage?.inputTokens,
completionTokens: usage?.outputTokens, // Record token usage for server-side quota tracking (if enabled)
}) // Use totalUsage (cumulative across all steps) instead of usage (final step only)
// Include all 4 token types: input, output, cache read, cache write
if (
isQuotaEnabled() &&
!hasOwnApiKey &&
userId !== "anonymous" &&
totalUsage
) {
const totalTokens =
(totalUsage.inputTokens || 0) +
(totalUsage.outputTokens || 0) +
(totalUsage.cachedInputTokens || 0) +
(totalUsage.inputTokenDetails?.cacheWriteTokens || 0)
recordTokenUsage(userId, totalTokens)
}
}, },
tools: { tools: {
// Client-side tool that will be executed on the client // Client-side tool that will be executed on the client
@@ -677,19 +731,9 @@ Call this tool to get shape names and usage syntax for a specific library.`,
messageMetadata: ({ part }) => { messageMetadata: ({ part }) => {
if (part.type === "finish") { if (part.type === "finish") {
const usage = (part as any).totalUsage const usage = (part as any).totalUsage
if (!usage) { // AI SDK 6 provides totalTokens directly
console.warn(
"[messageMetadata] No usage data in finish part",
)
return undefined
}
// Total input = non-cached + cached (these are separate counts)
// Note: cacheWriteInputTokens is not available on finish part
const totalInputTokens =
(usage.inputTokens ?? 0) + (usage.cachedInputTokens ?? 0)
return { return {
inputTokens: totalInputTokens, totalTokens: usage?.totalTokens ?? 0,
outputTokens: usage.outputTokens ?? 0,
finishReason: (part as any).finishReason, finishReason: (part as any).finishReason,
} }
} }

View File

@@ -27,9 +27,18 @@ export async function POST(req: Request) {
const { messageId, feedback, sessionId } = data const { messageId, feedback, sessionId } = data
// Get user IP for tracking // Skip logging if no sessionId - prevents attaching to wrong user's trace
if (!sessionId) {
return Response.json({ success: true, logged: false })
}
// Get user IP for tracking (hashed for privacy)
const forwardedFor = req.headers.get("x-forwarded-for") const forwardedFor = req.headers.get("x-forwarded-for")
const userId = forwardedFor?.split(",")[0]?.trim() || "anonymous" const rawIp = forwardedFor?.split(",")[0]?.trim() || "anonymous"
const userId =
rawIp === "anonymous"
? rawIp
: `user-${Buffer.from(rawIp).toString("base64url").slice(0, 8)}`
try { try {
// Find the most recent chat trace for this session to attach the score to // Find the most recent chat trace for this session to attach the score to

View File

@@ -27,6 +27,11 @@ export async function POST(req: Request) {
const { filename, format, sessionId } = data const { filename, format, sessionId } = data
// Skip logging if no sessionId - prevents attaching to wrong user's trace
if (!sessionId) {
return Response.json({ success: true, logged: false })
}
try { try {
const timestamp = new Date().toISOString() const timestamp = new Date().toISOString()

View File

@@ -31,6 +31,7 @@ import { getApiEndpoint } from "@/lib/base-path"
import { import {
applyDiagramOperations, applyDiagramOperations,
convertToLegalXml, convertToLegalXml,
extractCompleteMxCells,
isMxCellXmlComplete, isMxCellXmlComplete,
replaceNodes, replaceNodes,
validateAndFixXml, validateAndFixXml,
@@ -315,12 +316,28 @@ export function ChatMessageDisplay({
const handleDisplayChart = useCallback( const handleDisplayChart = useCallback(
(xml: string, showToast = false) => { (xml: string, showToast = false) => {
const currentXml = xml || "" let currentXml = xml || ""
const startTime = performance.now()
// During streaming (showToast=false), extract only complete mxCell elements
// This allows progressive rendering even with partial/incomplete trailing XML
if (!showToast) {
const completeCells = extractCompleteMxCells(currentXml)
if (!completeCells) {
return
}
currentXml = completeCells
}
const convertedXml = convertToLegalXml(currentXml) const convertedXml = convertToLegalXml(currentXml)
if (convertedXml !== previousXML.current) { if (convertedXml !== previousXML.current) {
// Parse and validate XML BEFORE calling replaceNodes // Parse and validate XML BEFORE calling replaceNodes
const parser = new DOMParser() const parser = new DOMParser()
const testDoc = parser.parseFromString(convertedXml, "text/xml") // Wrap in root element for parsing multiple mxCell elements
const testDoc = parser.parseFromString(
`<root>${convertedXml}</root>`,
"text/xml",
)
const parseError = testDoc.querySelector("parsererror") const parseError = testDoc.querySelector("parsererror")
if (parseError) { if (parseError) {
@@ -347,7 +364,22 @@ export function ChatMessageDisplay({
`<mxfile><diagram name="Page-1" id="page-1"><mxGraphModel><root><mxCell id="0"/><mxCell id="1" parent="0"/></root></mxGraphModel></diagram></mxfile>` `<mxfile><diagram name="Page-1" id="page-1"><mxGraphModel><root><mxCell id="0"/><mxCell id="1" parent="0"/></root></mxGraphModel></diagram></mxfile>`
const replacedXML = replaceNodes(baseXML, convertedXml) const replacedXML = replaceNodes(baseXML, convertedXml)
// Validate and auto-fix the XML const xmlProcessTime = performance.now() - startTime
// During streaming (showToast=false), skip heavy validation for lower latency
// The quick DOM parse check above catches malformed XML
// Full validation runs on final output (showToast=true)
if (!showToast) {
previousXML.current = convertedXml
const loadStartTime = performance.now()
onDisplayChart(replacedXML, true)
console.log(
`[Streaming] XML processing: ${xmlProcessTime.toFixed(1)}ms, drawio load: ${(performance.now() - loadStartTime).toFixed(1)}ms`,
)
return
}
// Final output: run full validation and auto-fix
const validation = validateAndFixXml(replacedXML) const validation = validateAndFixXml(replacedXML)
if (validation.valid) { if (validation.valid) {
previousXML.current = convertedXml previousXML.current = convertedXml
@@ -360,18 +392,19 @@ export function ChatMessageDisplay({
) )
} }
// Skip validation in loadDiagram since we already validated above // Skip validation in loadDiagram since we already validated above
const loadStartTime = performance.now()
onDisplayChart(xmlToLoad, true) onDisplayChart(xmlToLoad, true)
console.log(
`[Final] XML processing: ${xmlProcessTime.toFixed(1)}ms, validation+load: ${(performance.now() - loadStartTime).toFixed(1)}ms`,
)
} else { } else {
console.error( console.error(
"[ChatMessageDisplay] XML validation failed:", "[ChatMessageDisplay] XML validation failed:",
validation.error, validation.error,
) )
// Only show toast if this is the final XML (not during streaming) toast.error(
if (showToast) { "Diagram validation failed. Please try regenerating.",
toast.error( )
"Diagram validation failed. Please try regenerating.",
)
}
} }
} catch (error) { } catch (error) {
console.error( console.error(
@@ -603,17 +636,10 @@ export function ChatMessageDisplay({
} }
}) })
// Cleanup: clear any pending debounce timeout on unmount // NOTE: Don't cleanup debounce timeouts here!
return () => { // The cleanup runs on every re-render (when messages changes),
if (debounceTimeoutRef.current) { // which would cancel the timeout before it fires.
clearTimeout(debounceTimeoutRef.current) // Let the timeouts complete naturally - they're harmless if component unmounts.
debounceTimeoutRef.current = null
}
if (editDebounceTimeoutRef.current) {
clearTimeout(editDebounceTimeoutRef.current)
editDebounceTimeoutRef.current = null
}
}
}, [messages, handleDisplayChart, chartXML]) }, [messages, handleDisplayChart, chartXML])
const renderToolPart = (part: ToolPartLike) => { const renderToolPart = (part: ToolPartLike) => {

View File

@@ -76,6 +76,7 @@ interface ChatPanelProps {
const TOOL_ERROR_STATE = "output-error" as const const TOOL_ERROR_STATE = "output-error" as const
const DEBUG = process.env.NODE_ENV === "development" const DEBUG = process.env.NODE_ENV === "development"
const MAX_AUTO_RETRY_COUNT = 1 const MAX_AUTO_RETRY_COUNT = 1
const MAX_CONTINUATION_RETRY_COUNT = 2 // Limit for truncation continuation retries
/** /**
* Check if auto-resubmit should happen based on tool errors. * Check if auto-resubmit should happen based on tool errors.
@@ -216,6 +217,8 @@ export default function ChatPanel({
// Ref to track consecutive auto-retry count (reset on user action) // Ref to track consecutive auto-retry count (reset on user action)
const autoRetryCountRef = useRef(0) const autoRetryCountRef = useRef(0)
// Ref to track continuation retry count (for truncation handling)
const continuationRetryCountRef = useRef(0)
// Ref to accumulate partial XML when output is truncated due to maxOutputTokens // Ref to accumulate partial XML when output is truncated due to maxOutputTokens
// When partialXmlRef.current.length > 0, we're in continuation mode // When partialXmlRef.current.length > 0, we're in continuation mode
@@ -553,6 +556,23 @@ Continue from EXACTLY where you stopped.`,
} }
}, },
onError: (error) => { onError: (error) => {
// Handle server-side quota limit (429 response)
if (error.message.includes("Daily request limit")) {
quotaManager.showQuotaLimitToast()
return
}
if (error.message.includes("Daily token limit")) {
quotaManager.showTokenLimitToast(dailyTokenLimit)
return
}
if (
error.message.includes("Rate limit exceeded") ||
error.message.includes("tokens per minute")
) {
quotaManager.showTPMLimitToast()
return
}
// Silence access code error in console since it's handled by UI // Silence access code error in console since it's handled by UI
if (!error.message.includes("Invalid or missing access code")) { if (!error.message.includes("Invalid or missing access code")) {
console.error("Chat error:", error) console.error("Chat error:", error)
@@ -629,22 +649,6 @@ Continue from EXACTLY where you stopped.`,
// DEBUG: Log finish reason to diagnose truncation // DEBUG: Log finish reason to diagnose truncation
console.log("[onFinish] finishReason:", metadata?.finishReason) console.log("[onFinish] finishReason:", metadata?.finishReason)
console.log("[onFinish] metadata:", metadata)
if (metadata) {
// Use Number.isFinite to guard against NaN (typeof NaN === 'number' is true)
const inputTokens = Number.isFinite(metadata.inputTokens)
? (metadata.inputTokens as number)
: 0
const outputTokens = Number.isFinite(metadata.outputTokens)
? (metadata.outputTokens as number)
: 0
const actualTokens = inputTokens + outputTokens
if (actualTokens > 0) {
quotaManager.incrementTokenCount(actualTokens)
quotaManager.incrementTPMCount(actualTokens)
}
}
}, },
sendAutomaticallyWhen: ({ messages }) => { sendAutomaticallyWhen: ({ messages }) => {
const isInContinuationMode = partialXmlRef.current.length > 0 const isInContinuationMode = partialXmlRef.current.length > 0
@@ -656,15 +660,25 @@ Continue from EXACTLY where you stopped.`,
if (!shouldRetry) { if (!shouldRetry) {
// No error, reset retry count and clear state // No error, reset retry count and clear state
autoRetryCountRef.current = 0 autoRetryCountRef.current = 0
continuationRetryCountRef.current = 0
partialXmlRef.current = "" partialXmlRef.current = ""
return false return false
} }
// Continuation mode: unlimited retries (truncation continuation, not real errors) // Continuation mode: limited retries for truncation handling
// Server limits to 5 steps via stepCountIs(5)
if (isInContinuationMode) { if (isInContinuationMode) {
// Don't count against retry limit for continuation if (
// Quota checks still apply below continuationRetryCountRef.current >=
MAX_CONTINUATION_RETRY_COUNT
) {
toast.error(
`Continuation retry limit reached (${MAX_CONTINUATION_RETRY_COUNT}). The diagram may be too complex.`,
)
continuationRetryCountRef.current = 0
partialXmlRef.current = ""
return false
}
continuationRetryCountRef.current++
} else { } else {
// Regular error: check retry count limit // Regular error: check retry count limit
if (autoRetryCountRef.current >= MAX_AUTO_RETRY_COUNT) { if (autoRetryCountRef.current >= MAX_AUTO_RETRY_COUNT) {
@@ -679,23 +693,6 @@ Continue from EXACTLY where you stopped.`,
autoRetryCountRef.current++ autoRetryCountRef.current++
} }
// Check quota limits before auto-retry
const tokenLimitCheck = quotaManager.checkTokenLimit()
if (!tokenLimitCheck.allowed) {
quotaManager.showTokenLimitToast(tokenLimitCheck.used)
autoRetryCountRef.current = 0
partialXmlRef.current = ""
return false
}
const tpmCheck = quotaManager.checkTPMLimit()
if (!tpmCheck.allowed) {
quotaManager.showTPMLimitToast()
autoRetryCountRef.current = 0
partialXmlRef.current = ""
return false
}
return true return true
}, },
}) })
@@ -912,9 +909,6 @@ Continue from EXACTLY where you stopped.`,
xmlSnapshotsRef.current.set(messageIndex, chartXml) xmlSnapshotsRef.current.set(messageIndex, chartXml)
saveXmlSnapshots() saveXmlSnapshots()
// Check all quota limits
if (!checkAllQuotaLimits()) return
sendChatMessage(parts, chartXml, previousXml, sessionId) sendChatMessage(parts, chartXml, previousXml, sessionId)
// Token count is tracked in onFinish with actual server usage // Token count is tracked in onFinish with actual server usage
@@ -992,30 +986,7 @@ Continue from EXACTLY where you stopped.`,
saveXmlSnapshots() saveXmlSnapshots()
} }
// Check all quota limits (daily requests, tokens, TPM) // Send chat message with headers
const checkAllQuotaLimits = (): boolean => {
const limitCheck = quotaManager.checkDailyLimit()
if (!limitCheck.allowed) {
quotaManager.showQuotaLimitToast()
return false
}
const tokenLimitCheck = quotaManager.checkTokenLimit()
if (!tokenLimitCheck.allowed) {
quotaManager.showTokenLimitToast(tokenLimitCheck.used)
return false
}
const tpmCheck = quotaManager.checkTPMLimit()
if (!tpmCheck.allowed) {
quotaManager.showTPMLimitToast()
return false
}
return true
}
// Send chat message with headers and increment quota
const sendChatMessage = ( const sendChatMessage = (
parts: any, parts: any,
xml: string, xml: string,
@@ -1024,6 +995,7 @@ Continue from EXACTLY where you stopped.`,
) => { ) => {
// Reset all retry/continuation state on user-initiated message // Reset all retry/continuation state on user-initiated message
autoRetryCountRef.current = 0 autoRetryCountRef.current = 0
continuationRetryCountRef.current = 0
partialXmlRef.current = "" partialXmlRef.current = ""
const config = getSelectedAIConfig() const config = getSelectedAIConfig()
@@ -1064,7 +1036,6 @@ Continue from EXACTLY where you stopped.`,
}, },
}, },
) )
quotaManager.incrementRequestCount()
} }
// Process files and append content to user text (handles PDF, text, and optionally images) // Process files and append content to user text (handles PDF, text, and optionally images)
@@ -1152,13 +1123,8 @@ Continue from EXACTLY where you stopped.`,
setMessages(newMessages) setMessages(newMessages)
}) })
// Check all quota limits
if (!checkAllQuotaLimits()) return
// Now send the message after state is guaranteed to be updated // Now send the message after state is guaranteed to be updated
sendChatMessage(userParts, savedXml, previousXml, sessionId) sendChatMessage(userParts, savedXml, previousXml, sessionId)
// Token count is tracked in onFinish with actual server usage
} }
const handleEditMessage = async (messageIndex: number, newText: string) => { const handleEditMessage = async (messageIndex: number, newText: string) => {
@@ -1200,12 +1166,8 @@ Continue from EXACTLY where you stopped.`,
setMessages(newMessages) setMessages(newMessages)
}) })
// Check all quota limits
if (!checkAllQuotaLimits()) return
// Now send the edited message after state is guaranteed to be updated // Now send the edited message after state is guaranteed to be updated
sendChatMessage(newParts, savedXml, previousXml, sessionId) sendChatMessage(newParts, savedXml, previousXml, sessionId)
// Token count is tracked in onFinish with actual server usage
} }
// Collapsed view (desktop only) // Collapsed view (desktop only)

238
lib/dynamo-quota-manager.ts Normal file
View File

@@ -0,0 +1,238 @@
import {
ConditionalCheckFailedException,
DynamoDBClient,
GetItemCommand,
UpdateItemCommand,
} from "@aws-sdk/client-dynamodb"
// Quota tracking is OPT-IN: only enabled if DYNAMODB_QUOTA_TABLE is explicitly set
// OSS users who don't need quota tracking can simply not set this env var
const TABLE = process.env.DYNAMODB_QUOTA_TABLE
const DYNAMODB_REGION = process.env.DYNAMODB_REGION || "ap-northeast-1"
// Only create client if quota is enabled
const client = TABLE ? new DynamoDBClient({ region: DYNAMODB_REGION }) : null
/**
* Check if server-side quota tracking is enabled.
* Quota is opt-in: only enabled when DYNAMODB_QUOTA_TABLE env var is set.
*/
export function isQuotaEnabled(): boolean {
return !!TABLE
}
interface QuotaLimits {
requests: number // Daily request limit
tokens: number // Daily token limit
tpm: number // Tokens per minute
}
interface QuotaCheckResult {
allowed: boolean
error?: string
type?: "request" | "token" | "tpm"
used?: number
limit?: number
}
/**
* Check all quotas and increment request count atomically.
* Uses ConditionExpression to prevent race conditions.
* Returns which limit was exceeded if any.
*/
export async function checkAndIncrementRequest(
ip: string,
limits: QuotaLimits,
): Promise<QuotaCheckResult> {
// Skip if quota tracking not enabled
if (!client || !TABLE) {
return { allowed: true }
}
const today = new Date().toISOString().split("T")[0]
const currentMinute = Math.floor(Date.now() / 60000).toString()
const ttl = Math.floor(Date.now() / 1000) + 7 * 24 * 60 * 60
try {
// Atomic check-and-increment with ConditionExpression
// This prevents race conditions by failing if limits are exceeded
await client.send(
new UpdateItemCommand({
TableName: TABLE,
Key: { PK: { S: `IP#${ip}` } },
// Reset counts if new day/minute, then increment request count
UpdateExpression: `
SET lastResetDate = :today,
dailyReqCount = if_not_exists(dailyReqCount, :zero) + :one,
dailyTokenCount = if_not_exists(dailyTokenCount, :zero),
lastMinute = :minute,
tpmCount = if_not_exists(tpmCount, :zero),
#ttl = :ttl
`,
// Atomic condition: only succeed if ALL limits pass
// Uses attribute_not_exists for new items, then checks limits for existing items
ConditionExpression: `
(attribute_not_exists(lastResetDate) OR lastResetDate < :today OR
((attribute_not_exists(dailyReqCount) OR dailyReqCount < :reqLimit) AND
(attribute_not_exists(dailyTokenCount) OR dailyTokenCount < :tokenLimit))) AND
(attribute_not_exists(lastMinute) OR lastMinute <> :minute OR
attribute_not_exists(tpmCount) OR tpmCount < :tpmLimit)
`,
ExpressionAttributeNames: { "#ttl": "ttl" },
ExpressionAttributeValues: {
":today": { S: today },
":zero": { N: "0" },
":one": { N: "1" },
":minute": { S: currentMinute },
":ttl": { N: String(ttl) },
":reqLimit": { N: String(limits.requests || 999999) },
":tokenLimit": { N: String(limits.tokens || 999999) },
":tpmLimit": { N: String(limits.tpm || 999999) },
},
}),
)
return { allowed: true }
} catch (e: any) {
// Condition failed - need to determine which limit was exceeded
if (e instanceof ConditionalCheckFailedException) {
// Get current counts to determine which limit was hit
try {
const getResult = await client.send(
new GetItemCommand({
TableName: TABLE,
Key: { PK: { S: `IP#${ip}` } },
}),
)
const item = getResult.Item
const storedDate = item?.lastResetDate?.S
const storedMinute = item?.lastMinute?.S
const isNewDay = !storedDate || storedDate < today
const dailyReqCount = isNewDay
? 0
: Number(item?.dailyReqCount?.N || 0)
const dailyTokenCount = isNewDay
? 0
: Number(item?.dailyTokenCount?.N || 0)
const tpmCount =
storedMinute !== currentMinute
? 0
: Number(item?.tpmCount?.N || 0)
// Determine which limit was exceeded
if (limits.requests > 0 && dailyReqCount >= limits.requests) {
return {
allowed: false,
type: "request",
error: "Daily request limit exceeded",
used: dailyReqCount,
limit: limits.requests,
}
}
if (limits.tokens > 0 && dailyTokenCount >= limits.tokens) {
return {
allowed: false,
type: "token",
error: "Daily token limit exceeded",
used: dailyTokenCount,
limit: limits.tokens,
}
}
if (limits.tpm > 0 && tpmCount >= limits.tpm) {
return {
allowed: false,
type: "tpm",
error: "Rate limit exceeded (tokens per minute)",
used: tpmCount,
limit: limits.tpm,
}
}
// Condition failed but no limit clearly exceeded - race condition edge case
// Fail safe by allowing (could be a reset race)
console.warn(
`[quota] Condition failed but no limit exceeded for IP prefix: ${ip.slice(0, 8)}...`,
)
return { allowed: true }
} catch (getError: any) {
console.error(
`[quota] Failed to get quota details after condition failure, IP prefix: ${ip.slice(0, 8)}..., error: ${getError.message}`,
)
return { allowed: true } // Fail open
}
}
// Other DynamoDB errors - fail open
console.error(
`[quota] DynamoDB error (fail-open), IP prefix: ${ip.slice(0, 8)}..., error: ${e.message}`,
)
return { allowed: true }
}
}
/**
* Record token usage after response completes.
* Uses atomic operations to update both daily token count and TPM count.
* Handles minute boundaries atomically to prevent race conditions.
*/
export async function recordTokenUsage(
ip: string,
tokens: number,
): Promise<void> {
// Skip if quota tracking not enabled
if (!client || !TABLE) return
if (!Number.isFinite(tokens) || tokens <= 0) return
const currentMinute = Math.floor(Date.now() / 60000).toString()
const ttl = Math.floor(Date.now() / 1000) + 7 * 24 * 60 * 60
try {
// Try to update assuming same minute (most common case)
// Uses condition to ensure we're in the same minute
await client.send(
new UpdateItemCommand({
TableName: TABLE,
Key: { PK: { S: `IP#${ip}` } },
UpdateExpression:
"SET #ttl = :ttl ADD dailyTokenCount :tokens, tpmCount :tokens",
ConditionExpression: "lastMinute = :minute",
ExpressionAttributeNames: { "#ttl": "ttl" },
ExpressionAttributeValues: {
":minute": { S: currentMinute },
":tokens": { N: String(tokens) },
":ttl": { N: String(ttl) },
},
}),
)
} catch (e: any) {
if (e instanceof ConditionalCheckFailedException) {
// Different minute - reset TPM count and set new minute
try {
await client.send(
new UpdateItemCommand({
TableName: TABLE,
Key: { PK: { S: `IP#${ip}` } },
UpdateExpression:
"SET lastMinute = :minute, tpmCount = :tokens, #ttl = :ttl ADD dailyTokenCount :tokens",
ExpressionAttributeNames: { "#ttl": "ttl" },
ExpressionAttributeValues: {
":minute": { S: currentMinute },
":tokens": { N: String(tokens) },
":ttl": { N: String(ttl) },
},
}),
)
} catch (retryError: any) {
console.error(
`[quota] Failed to record tokens (retry), IP prefix: ${ip.slice(0, 8)}..., tokens: ${tokens}, error: ${retryError.message}`,
)
}
} else {
console.error(
`[quota] Failed to record tokens, IP prefix: ${ip.slice(0, 8)}..., tokens: ${tokens}, error: ${e.message}`,
)
}
}
}

View File

@@ -21,9 +21,11 @@ export function getLangfuseClient(): LangfuseClient | null {
return langfuseClient return langfuseClient
} }
// Check if Langfuse is configured // Check if Langfuse is configured (both keys required)
export function isLangfuseEnabled(): boolean { export function isLangfuseEnabled(): boolean {
return !!process.env.LANGFUSE_PUBLIC_KEY return !!(
process.env.LANGFUSE_PUBLIC_KEY && process.env.LANGFUSE_SECRET_KEY
)
} }
// Update trace with input data at the start of request // Update trace with input data at the start of request
@@ -43,34 +45,16 @@ export function setTraceInput(params: {
} }
// Update trace with output and end the span // Update trace with output and end the span
export function setTraceOutput( // Note: AI SDK 6 telemetry automatically reports token usage on its spans,
output: string, // so we only need to set the output text and close our wrapper span
usage?: { promptTokens?: number; completionTokens?: number }, export function setTraceOutput(output: string) {
) {
if (!isLangfuseEnabled()) return if (!isLangfuseEnabled()) return
updateActiveTrace({ output }) updateActiveTrace({ output })
// End the observe() wrapper span (AI SDK creates its own child spans with usage)
const activeSpan = api.trace.getActiveSpan() const activeSpan = api.trace.getActiveSpan()
if (activeSpan) { if (activeSpan) {
// Manually set usage attributes since AI SDK Bedrock streaming doesn't provide them
if (usage?.promptTokens) {
activeSpan.setAttribute("ai.usage.promptTokens", usage.promptTokens)
activeSpan.setAttribute(
"gen_ai.usage.input_tokens",
usage.promptTokens,
)
}
if (usage?.completionTokens) {
activeSpan.setAttribute(
"ai.usage.completionTokens",
usage.completionTokens,
)
activeSpan.setAttribute(
"gen_ai.usage.output_tokens",
usage.completionTokens,
)
}
activeSpan.end() activeSpan.end()
} }
} }

View File

@@ -1,11 +1,10 @@
"use client" "use client"
import { useCallback, useMemo } from "react" import { useCallback } from "react"
import { toast } from "sonner" import { toast } from "sonner"
import { QuotaLimitToast } from "@/components/quota-limit-toast" import { QuotaLimitToast } from "@/components/quota-limit-toast"
import { useDictionary } from "@/hooks/use-dictionary" import { useDictionary } from "@/hooks/use-dictionary"
import { formatMessage } from "@/lib/i18n/utils" import { formatMessage } from "@/lib/i18n/utils"
import { STORAGE_KEYS } from "@/lib/storage"
export interface QuotaConfig { export interface QuotaConfig {
dailyRequestLimit: number dailyRequestLimit: number
@@ -13,134 +12,19 @@ export interface QuotaConfig {
tpmLimit: number tpmLimit: number
} }
export interface QuotaCheckResult {
allowed: boolean
remaining: number
used: number
}
/** /**
* Hook for managing request/token quotas and rate limiting. * Hook for displaying quota limit toasts.
* Handles three types of limits: * Server-side handles actual quota enforcement via DynamoDB.
* - Daily request limit * This hook only provides UI feedback when limits are exceeded.
* - Daily token limit
* - Tokens per minute (TPM) rate limit
*
* Users with their own API key bypass all limits.
*/ */
export function useQuotaManager(config: QuotaConfig): { export function useQuotaManager(config: QuotaConfig): {
hasOwnApiKey: () => boolean
checkDailyLimit: () => QuotaCheckResult
checkTokenLimit: () => QuotaCheckResult
checkTPMLimit: () => QuotaCheckResult
incrementRequestCount: () => void
incrementTokenCount: (tokens: number) => void
incrementTPMCount: (tokens: number) => void
showQuotaLimitToast: () => void showQuotaLimitToast: () => void
showTokenLimitToast: (used: number) => void showTokenLimitToast: (used: number) => void
showTPMLimitToast: () => void showTPMLimitToast: () => void
} { } {
const { dailyRequestLimit, dailyTokenLimit, tpmLimit } = config const { dailyRequestLimit, dailyTokenLimit, tpmLimit } = config
const dict = useDictionary() const dict = useDictionary()
// Check if user has their own API key configured (bypass limits)
const hasOwnApiKey = useCallback((): boolean => {
const provider = localStorage.getItem(STORAGE_KEYS.aiProvider)
const apiKey = localStorage.getItem(STORAGE_KEYS.aiApiKey)
return !!(provider && apiKey)
}, [])
// Generic helper: Parse count from localStorage with NaN guard
const parseStorageCount = (key: string): number => {
const count = parseInt(localStorage.getItem(key) || "0", 10)
return Number.isNaN(count) ? 0 : count
}
// Generic helper: Create quota checker factory
const createQuotaChecker = useCallback(
(
getTimeKey: () => string,
timeStorageKey: string,
countStorageKey: string,
limit: number,
) => {
return (): QuotaCheckResult => {
if (hasOwnApiKey())
return { allowed: true, remaining: -1, used: 0 }
if (limit <= 0) return { allowed: true, remaining: -1, used: 0 }
const currentTime = getTimeKey()
const storedTime = localStorage.getItem(timeStorageKey)
let count = parseStorageCount(countStorageKey)
if (storedTime !== currentTime) {
count = 0
localStorage.setItem(timeStorageKey, currentTime)
localStorage.setItem(countStorageKey, "0")
}
return {
allowed: count < limit,
remaining: limit - count,
used: count,
}
}
},
[hasOwnApiKey],
)
// Generic helper: Create quota incrementer factory
const createQuotaIncrementer = useCallback(
(
getTimeKey: () => string,
timeStorageKey: string,
countStorageKey: string,
validateInput: boolean = false,
) => {
return (tokens: number = 1): void => {
if (validateInput && (!Number.isFinite(tokens) || tokens <= 0))
return
const currentTime = getTimeKey()
const storedTime = localStorage.getItem(timeStorageKey)
let count = parseStorageCount(countStorageKey)
if (storedTime !== currentTime) {
count = 0
localStorage.setItem(timeStorageKey, currentTime)
}
localStorage.setItem(countStorageKey, String(count + tokens))
}
},
[],
)
// Check daily request limit
const checkDailyLimit = useMemo(
() =>
createQuotaChecker(
() => new Date().toDateString(),
STORAGE_KEYS.requestDate,
STORAGE_KEYS.requestCount,
dailyRequestLimit,
),
[createQuotaChecker, dailyRequestLimit],
)
// Increment request count
const incrementRequestCount = useMemo(
() =>
createQuotaIncrementer(
() => new Date().toDateString(),
STORAGE_KEYS.requestDate,
STORAGE_KEYS.requestCount,
false,
),
[createQuotaIncrementer],
)
// Show quota limit toast (request-based) // Show quota limit toast (request-based)
const showQuotaLimitToast = useCallback(() => { const showQuotaLimitToast = useCallback(() => {
toast.custom( toast.custom(
@@ -155,30 +39,6 @@ export function useQuotaManager(config: QuotaConfig): {
) )
}, [dailyRequestLimit]) }, [dailyRequestLimit])
// Check daily token limit
const checkTokenLimit = useMemo(
() =>
createQuotaChecker(
() => new Date().toDateString(),
STORAGE_KEYS.tokenDate,
STORAGE_KEYS.tokenCount,
dailyTokenLimit,
),
[createQuotaChecker, dailyTokenLimit],
)
// Increment token count
const incrementTokenCount = useMemo(
() =>
createQuotaIncrementer(
() => new Date().toDateString(),
STORAGE_KEYS.tokenDate,
STORAGE_KEYS.tokenCount,
true, // Validate input tokens
),
[createQuotaIncrementer],
)
// Show token limit toast // Show token limit toast
const showTokenLimitToast = useCallback( const showTokenLimitToast = useCallback(
(used: number) => { (used: number) => {
@@ -197,30 +57,6 @@ export function useQuotaManager(config: QuotaConfig): {
[dailyTokenLimit], [dailyTokenLimit],
) )
// Check TPM (tokens per minute) limit
const checkTPMLimit = useMemo(
() =>
createQuotaChecker(
() => Math.floor(Date.now() / 60000).toString(),
STORAGE_KEYS.tpmMinute,
STORAGE_KEYS.tpmCount,
tpmLimit,
),
[createQuotaChecker, tpmLimit],
)
// Increment TPM count
const incrementTPMCount = useMemo(
() =>
createQuotaIncrementer(
() => Math.floor(Date.now() / 60000).toString(),
STORAGE_KEYS.tpmMinute,
STORAGE_KEYS.tpmCount,
true, // Validate input tokens
),
[createQuotaIncrementer],
)
// Show TPM limit toast // Show TPM limit toast
const showTPMLimitToast = useCallback(() => { const showTPMLimitToast = useCallback(() => {
const limitDisplay = const limitDisplay =
@@ -233,18 +69,6 @@ export function useQuotaManager(config: QuotaConfig): {
}, [tpmLimit, dict]) }, [tpmLimit, dict])
return { return {
// Check functions
hasOwnApiKey,
checkDailyLimit,
checkTokenLimit,
checkTPMLimit,
// Increment functions
incrementRequestCount,
incrementTokenCount,
incrementTPMCount,
// Toast functions
showQuotaLimitToast, showQuotaLimitToast,
showTokenLimitToast, showTokenLimitToast,
showTPMLimitToast, showTPMLimitToast,

View File

@@ -61,6 +61,47 @@ export function isMxCellXmlComplete(xml: string | undefined | null): boolean {
return trimmed.endsWith("/>") || trimmed.endsWith("</mxCell>") return trimmed.endsWith("/>") || trimmed.endsWith("</mxCell>")
} }
/**
* Extract only complete mxCell elements from partial/streaming XML.
* This allows progressive rendering during streaming by ignoring incomplete trailing elements.
* @param xml - The partial XML string (may contain incomplete trailing mxCell)
* @returns XML string containing only complete mxCell elements
*/
export function extractCompleteMxCells(xml: string | undefined | null): string {
if (!xml) return ""
const completeCells: Array<{ index: number; text: string }> = []
// Match self-closing mxCell tags: <mxCell ... />
// Also match mxCell with nested mxGeometry: <mxCell ...>...<mxGeometry .../></mxCell>
const selfClosingPattern = /<mxCell\s+[^>]*\/>/g
const nestedPattern = /<mxCell\s+[^>]*>[\s\S]*?<\/mxCell>/g
// Find all self-closing mxCell elements
let match: RegExpExecArray | null
while ((match = selfClosingPattern.exec(xml)) !== null) {
completeCells.push({ index: match.index, text: match[0] })
}
// Find all mxCell elements with nested content (like mxGeometry)
while ((match = nestedPattern.exec(xml)) !== null) {
completeCells.push({ index: match.index, text: match[0] })
}
// Sort by position to maintain order
completeCells.sort((a, b) => a.index - b.index)
// Remove duplicates (a self-closing match might overlap with nested match)
const seen = new Set<number>()
const uniqueCells = completeCells.filter((cell) => {
if (seen.has(cell.index)) return false
seen.add(cell.index)
return true
})
return uniqueCells.map((c) => c.text).join("\n")
}
// ============================================================================ // ============================================================================
// XML Parsing Helpers // XML Parsing Helpers
// ============================================================================ // ============================================================================

1370
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -24,21 +24,22 @@
"dist:all": "npm run electron:build && npm run electron:prepare && npx electron-builder --mac --win --linux" "dist:all": "npm run electron:build && npm run electron:prepare && npx electron-builder --mac --win --linux"
}, },
"dependencies": { "dependencies": {
"@ai-sdk/amazon-bedrock": "^3.0.70", "@ai-sdk/amazon-bedrock": "^4.0.1",
"@ai-sdk/anthropic": "^2.0.44", "@ai-sdk/anthropic": "^3.0.0",
"@ai-sdk/azure": "^2.0.69", "@ai-sdk/azure": "^3.0.0",
"@ai-sdk/deepseek": "^1.0.30", "@ai-sdk/deepseek": "^2.0.0",
"@ai-sdk/gateway": "^2.0.21", "@ai-sdk/gateway": "^3.0.0",
"@ai-sdk/google": "^2.0.0", "@ai-sdk/google": "^3.0.0",
"@ai-sdk/openai": "^2.0.19", "@ai-sdk/openai": "^3.0.0",
"@ai-sdk/react": "^2.0.107", "@ai-sdk/react": "^3.0.1",
"@aws-sdk/client-dynamodb": "^3.957.0",
"@aws-sdk/credential-providers": "^3.943.0", "@aws-sdk/credential-providers": "^3.943.0",
"@formatjs/intl-localematcher": "^0.7.2", "@formatjs/intl-localematcher": "^0.7.2",
"@langfuse/client": "^4.4.9", "@langfuse/client": "^4.4.9",
"@langfuse/otel": "^4.4.4", "@langfuse/otel": "^4.4.4",
"@langfuse/tracing": "^4.4.9", "@langfuse/tracing": "^4.4.9",
"@next/third-parties": "^16.0.6", "@next/third-parties": "^16.0.6",
"@openrouter/ai-sdk-provider": "^1.2.3", "@openrouter/ai-sdk-provider": "^1.5.4",
"@opentelemetry/exporter-trace-otlp-http": "^0.208.0", "@opentelemetry/exporter-trace-otlp-http": "^0.208.0",
"@opentelemetry/sdk-trace-node": "^2.2.0", "@opentelemetry/sdk-trace-node": "^2.2.0",
"@radix-ui/react-alert-dialog": "^1.1.15", "@radix-ui/react-alert-dialog": "^1.1.15",
@@ -53,7 +54,7 @@
"@radix-ui/react-tooltip": "^1.1.8", "@radix-ui/react-tooltip": "^1.1.8",
"@radix-ui/react-use-controllable-state": "^1.2.2", "@radix-ui/react-use-controllable-state": "^1.2.2",
"@xmldom/xmldom": "^0.9.8", "@xmldom/xmldom": "^0.9.8",
"ai": "^5.0.89", "ai": "^6.0.1",
"base-64": "^1.0.0", "base-64": "^1.0.0",
"class-variance-authority": "^0.7.1", "class-variance-authority": "^0.7.1",
"clsx": "^2.1.1", "clsx": "^2.1.1",
@@ -111,5 +112,10 @@
"tailwindcss": "^4", "tailwindcss": "^4",
"typescript": "^5", "typescript": "^5",
"wait-on": "^9.0.3" "wait-on": "^9.0.3"
},
"overrides": {
"@openrouter/ai-sdk-provider": {
"ai": "^6.0.1"
}
} }
} }