mirror of
https://github.com/DayuanJiang/next-ai-draw-io.git
synced 2026-01-02 14:22:28 +08:00
feat: add daily token limit with actual usage tracking (#171)
* feat: add daily token limit with actual usage tracking - Add DAILY_TOKEN_LIMIT env var for configurable daily token limit - Track actual tokens from Bedrock API response metadata (not estimates) - Server sends inputTokens + cachedInputTokens + outputTokens via messageMetadata - Client increments token count in onFinish callback with actual usage - Add NaN guards to prevent corrupted localStorage values - Add token limit toast notification with quota display - Remove client-side token estimation (was blocking legitimate requests) - Switch to js-tiktoken for client compatibility (pure JS, no WASM) * feat: add TPM (tokens per minute) rate limiting - Add 50k tokens/min client-side rate limit - Track tokens per minute with automatic minute rollover - Check TPM limit after daily limits pass - Show toast when rate limit reached - NaN guards for localStorage values * feat: make TPM limit configurable via TPM_LIMIT env var * chore: restore cache debug logs * fix: prevent race condition in TPM tracking checkTPMLimit was resetting TPM count to 0 when checking, which overwrote the count saved by incrementTPMCount. Now checkTPMLimit only reads and incrementTPMCount handles all writes. * chore: improve TPM limit error message clarity
This commit is contained in:
@@ -189,32 +189,11 @@ async function handleChatRequest(req: Request): Promise<Response> {
|
||||
const textPart = lastMessage.parts?.find((p: any) => p.type === "text")
|
||||
const filePart = lastMessage.parts?.find((p: any) => p.type === "file")
|
||||
|
||||
console.log("[Cache DEBUG] textPart?.text:", textPart?.text)
|
||||
console.log("[Cache DEBUG] hasFilePart:", !!filePart)
|
||||
|
||||
const cached = findCachedResponse(textPart?.text || "", !!filePart)
|
||||
|
||||
console.log("[Cache DEBUG] cached found:", !!cached)
|
||||
|
||||
if (cached) {
|
||||
console.log(
|
||||
"[Cache] Returning cached response for:",
|
||||
textPart?.text,
|
||||
)
|
||||
return createCachedStreamResponse(cached.xml)
|
||||
} else {
|
||||
console.log("[Cache DEBUG] No cache match - checking why...")
|
||||
console.log(
|
||||
"[Cache DEBUG] Exact promptText:",
|
||||
JSON.stringify(textPart?.text),
|
||||
)
|
||||
}
|
||||
} else {
|
||||
console.log("[Cache DEBUG] Skipping cache check - conditions not met")
|
||||
if (!isFirstMessage)
|
||||
console.log("[Cache DEBUG] Reason: not first message")
|
||||
if (!isEmptyDiagram)
|
||||
console.log("[Cache DEBUG] Reason: diagram not empty")
|
||||
}
|
||||
// === CACHE CHECK END ===
|
||||
|
||||
@@ -243,28 +222,6 @@ ${lastMessageText}
|
||||
// Convert UIMessages to ModelMessages and add system message
|
||||
const modelMessages = convertToModelMessages(messages)
|
||||
|
||||
// Debug: log raw messages to see what's coming in
|
||||
console.log(
|
||||
"[DEBUG] Raw UI messages:",
|
||||
JSON.stringify(
|
||||
messages.map((m: any, i: number) => ({
|
||||
index: i,
|
||||
role: m.role,
|
||||
partsCount: m.parts?.length,
|
||||
parts: m.parts?.map((p: any) => ({
|
||||
type: p.type,
|
||||
toolName: p.toolName,
|
||||
toolCallId: p.toolCallId,
|
||||
state: p.state,
|
||||
inputType: p.input ? typeof p.input : undefined,
|
||||
input: p.input,
|
||||
})),
|
||||
})),
|
||||
null,
|
||||
2,
|
||||
),
|
||||
)
|
||||
|
||||
// Fix tool call inputs for Bedrock API (requires JSON objects, not strings)
|
||||
const fixedMessages = fixToolCallInputs(modelMessages)
|
||||
|
||||
@@ -383,14 +340,8 @@ ${lastMessageText}
|
||||
}
|
||||
return null
|
||||
},
|
||||
onFinish: ({ text, usage, providerMetadata }) => {
|
||||
console.log(
|
||||
"[Cache] Full providerMetadata:",
|
||||
JSON.stringify(providerMetadata, null, 2),
|
||||
)
|
||||
console.log("[Cache] Usage:", JSON.stringify(usage, null, 2))
|
||||
onFinish: ({ text, usage }) => {
|
||||
// Pass usage to Langfuse (Bedrock streaming doesn't auto-report tokens to telemetry)
|
||||
// AI SDK uses inputTokens/outputTokens, Langfuse expects promptTokens/completionTokens
|
||||
setTraceOutput(text, {
|
||||
promptTokens: usage?.inputTokens,
|
||||
completionTokens: usage?.outputTokens,
|
||||
@@ -476,7 +427,28 @@ IMPORTANT: Keep edits concise:
|
||||
}),
|
||||
})
|
||||
|
||||
return result.toUIMessageStreamResponse()
|
||||
return result.toUIMessageStreamResponse({
|
||||
messageMetadata: ({ part }) => {
|
||||
if (part.type === "finish") {
|
||||
const usage = (part as any).totalUsage
|
||||
if (!usage) {
|
||||
console.warn(
|
||||
"[messageMetadata] No usage data in finish part",
|
||||
)
|
||||
return undefined
|
||||
}
|
||||
// Total input = non-cached + cached (these are separate counts)
|
||||
// Note: cacheWriteInputTokens is not available on finish part
|
||||
const totalInputTokens =
|
||||
(usage.inputTokens ?? 0) + (usage.cachedInputTokens ?? 0)
|
||||
return {
|
||||
inputTokens: totalInputTokens,
|
||||
outputTokens: usage.outputTokens ?? 0,
|
||||
}
|
||||
}
|
||||
return undefined
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// Wrap handler with error handling
|
||||
|
||||
@@ -9,5 +9,7 @@ export async function GET() {
|
||||
return NextResponse.json({
|
||||
accessCodeRequired: accessCodes.length > 0,
|
||||
dailyRequestLimit: parseInt(process.env.DAILY_REQUEST_LIMIT || "0", 10),
|
||||
dailyTokenLimit: parseInt(process.env.DAILY_TOKEN_LIMIT || "0", 10),
|
||||
tpmLimit: parseInt(process.env.TPM_LIMIT || "0", 10),
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user