mirror of
https://github.com/DayuanJiang/next-ai-draw-io.git
synced 2026-01-02 22:32:27 +08:00
feat: add server-side quota tracking with DynamoDB (#379)
- Add dynamo-quota-manager.ts for atomic quota checks using ConditionExpression - Enforce daily request limit, daily token limit, and TPM limit - Return 429 with quota details (type, used, limit) when exceeded - Quota is opt-in: only enabled when DYNAMODB_QUOTA_TABLE env var is set - Remove client-side quota enforcement (server is now source of truth) - Simplify use-quota-manager.tsx to only display toasts - Add @aws-sdk/client-dynamodb dependency
This commit is contained in:
@@ -14,6 +14,11 @@ import path from "path"
|
|||||||
import { z } from "zod"
|
import { z } from "zod"
|
||||||
import { getAIModel, supportsPromptCaching } from "@/lib/ai-providers"
|
import { getAIModel, supportsPromptCaching } from "@/lib/ai-providers"
|
||||||
import { findCachedResponse } from "@/lib/cached-responses"
|
import { findCachedResponse } from "@/lib/cached-responses"
|
||||||
|
import {
|
||||||
|
checkAndIncrementRequest,
|
||||||
|
isQuotaEnabled,
|
||||||
|
recordTokenUsage,
|
||||||
|
} from "@/lib/dynamo-quota-manager"
|
||||||
import {
|
import {
|
||||||
getTelemetryConfig,
|
getTelemetryConfig,
|
||||||
setTraceInput,
|
setTraceInput,
|
||||||
@@ -191,6 +196,33 @@ async function handleChatRequest(req: Request): Promise<Response> {
|
|||||||
userId: userId,
|
userId: userId,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// === SERVER-SIDE QUOTA CHECK START ===
|
||||||
|
// Quota is opt-in: only enabled when DYNAMODB_QUOTA_TABLE env var is set
|
||||||
|
const hasOwnApiKey = !!(
|
||||||
|
req.headers.get("x-ai-provider") && req.headers.get("x-ai-api-key")
|
||||||
|
)
|
||||||
|
|
||||||
|
// Skip quota check if: quota disabled, user has own API key, or is anonymous
|
||||||
|
if (isQuotaEnabled() && !hasOwnApiKey && userId !== "anonymous") {
|
||||||
|
const quotaCheck = await checkAndIncrementRequest(userId, {
|
||||||
|
requests: Number(process.env.DAILY_REQUEST_LIMIT) || 10,
|
||||||
|
tokens: Number(process.env.DAILY_TOKEN_LIMIT) || 200000,
|
||||||
|
tpm: Number(process.env.TPM_LIMIT) || 20000,
|
||||||
|
})
|
||||||
|
if (!quotaCheck.allowed) {
|
||||||
|
return Response.json(
|
||||||
|
{
|
||||||
|
error: quotaCheck.error,
|
||||||
|
type: quotaCheck.type,
|
||||||
|
used: quotaCheck.used,
|
||||||
|
limit: quotaCheck.limit,
|
||||||
|
},
|
||||||
|
{ status: 429 },
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// === SERVER-SIDE QUOTA CHECK END ===
|
||||||
|
|
||||||
// === FILE VALIDATION START ===
|
// === FILE VALIDATION START ===
|
||||||
const fileValidation = validateFileParts(messages)
|
const fileValidation = validateFileParts(messages)
|
||||||
if (!fileValidation.valid) {
|
if (!fileValidation.valid) {
|
||||||
@@ -510,9 +542,21 @@ ${userInputText}
|
|||||||
userId,
|
userId,
|
||||||
}),
|
}),
|
||||||
}),
|
}),
|
||||||
onFinish: ({ text }) => {
|
onFinish: ({ text, usage }) => {
|
||||||
// AI SDK 6 telemetry auto-reports token usage on its spans
|
// AI SDK 6 telemetry auto-reports token usage on its spans
|
||||||
setTraceOutput(text)
|
setTraceOutput(text)
|
||||||
|
|
||||||
|
// Record token usage for server-side quota tracking (if enabled)
|
||||||
|
if (
|
||||||
|
isQuotaEnabled() &&
|
||||||
|
!hasOwnApiKey &&
|
||||||
|
userId !== "anonymous" &&
|
||||||
|
usage
|
||||||
|
) {
|
||||||
|
const totalTokens =
|
||||||
|
(usage.inputTokens || 0) + (usage.outputTokens || 0)
|
||||||
|
recordTokenUsage(userId, totalTokens)
|
||||||
|
}
|
||||||
},
|
},
|
||||||
tools: {
|
tools: {
|
||||||
// Client-side tool that will be executed on the client
|
// Client-side tool that will be executed on the client
|
||||||
|
|||||||
@@ -556,6 +556,23 @@ Continue from EXACTLY where you stopped.`,
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
onError: (error) => {
|
onError: (error) => {
|
||||||
|
// Handle server-side quota limit (429 response)
|
||||||
|
if (error.message.includes("Daily request limit")) {
|
||||||
|
quotaManager.showQuotaLimitToast()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if (error.message.includes("Daily token limit")) {
|
||||||
|
quotaManager.showTokenLimitToast(dailyTokenLimit)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if (
|
||||||
|
error.message.includes("Rate limit exceeded") ||
|
||||||
|
error.message.includes("tokens per minute")
|
||||||
|
) {
|
||||||
|
quotaManager.showTPMLimitToast()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// Silence access code error in console since it's handled by UI
|
// Silence access code error in console since it's handled by UI
|
||||||
if (!error.message.includes("Invalid or missing access code")) {
|
if (!error.message.includes("Invalid or missing access code")) {
|
||||||
console.error("Chat error:", error)
|
console.error("Chat error:", error)
|
||||||
@@ -632,16 +649,6 @@ Continue from EXACTLY where you stopped.`,
|
|||||||
|
|
||||||
// DEBUG: Log finish reason to diagnose truncation
|
// DEBUG: Log finish reason to diagnose truncation
|
||||||
console.log("[onFinish] finishReason:", metadata?.finishReason)
|
console.log("[onFinish] finishReason:", metadata?.finishReason)
|
||||||
|
|
||||||
// AI SDK 6 provides totalTokens directly
|
|
||||||
const totalTokens =
|
|
||||||
metadata && Number.isFinite(metadata.totalTokens)
|
|
||||||
? (metadata.totalTokens as number)
|
|
||||||
: 0
|
|
||||||
if (totalTokens > 0) {
|
|
||||||
quotaManager.incrementTokenCount(totalTokens)
|
|
||||||
quotaManager.incrementTPMCount(totalTokens)
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
sendAutomaticallyWhen: ({ messages }) => {
|
sendAutomaticallyWhen: ({ messages }) => {
|
||||||
const isInContinuationMode = partialXmlRef.current.length > 0
|
const isInContinuationMode = partialXmlRef.current.length > 0
|
||||||
@@ -686,25 +693,6 @@ Continue from EXACTLY where you stopped.`,
|
|||||||
autoRetryCountRef.current++
|
autoRetryCountRef.current++
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check quota limits before auto-retry
|
|
||||||
const tokenLimitCheck = quotaManager.checkTokenLimit()
|
|
||||||
if (!tokenLimitCheck.allowed) {
|
|
||||||
quotaManager.showTokenLimitToast(tokenLimitCheck.used)
|
|
||||||
autoRetryCountRef.current = 0
|
|
||||||
continuationRetryCountRef.current = 0
|
|
||||||
partialXmlRef.current = ""
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
const tpmCheck = quotaManager.checkTPMLimit()
|
|
||||||
if (!tpmCheck.allowed) {
|
|
||||||
quotaManager.showTPMLimitToast()
|
|
||||||
autoRetryCountRef.current = 0
|
|
||||||
continuationRetryCountRef.current = 0
|
|
||||||
partialXmlRef.current = ""
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
return true
|
return true
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
@@ -921,9 +909,6 @@ Continue from EXACTLY where you stopped.`,
|
|||||||
xmlSnapshotsRef.current.set(messageIndex, chartXml)
|
xmlSnapshotsRef.current.set(messageIndex, chartXml)
|
||||||
saveXmlSnapshots()
|
saveXmlSnapshots()
|
||||||
|
|
||||||
// Check all quota limits
|
|
||||||
if (!checkAllQuotaLimits()) return
|
|
||||||
|
|
||||||
sendChatMessage(parts, chartXml, previousXml, sessionId)
|
sendChatMessage(parts, chartXml, previousXml, sessionId)
|
||||||
|
|
||||||
// Token count is tracked in onFinish with actual server usage
|
// Token count is tracked in onFinish with actual server usage
|
||||||
@@ -1001,30 +986,7 @@ Continue from EXACTLY where you stopped.`,
|
|||||||
saveXmlSnapshots()
|
saveXmlSnapshots()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check all quota limits (daily requests, tokens, TPM)
|
// Send chat message with headers
|
||||||
const checkAllQuotaLimits = (): boolean => {
|
|
||||||
const limitCheck = quotaManager.checkDailyLimit()
|
|
||||||
if (!limitCheck.allowed) {
|
|
||||||
quotaManager.showQuotaLimitToast()
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
const tokenLimitCheck = quotaManager.checkTokenLimit()
|
|
||||||
if (!tokenLimitCheck.allowed) {
|
|
||||||
quotaManager.showTokenLimitToast(tokenLimitCheck.used)
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
const tpmCheck = quotaManager.checkTPMLimit()
|
|
||||||
if (!tpmCheck.allowed) {
|
|
||||||
quotaManager.showTPMLimitToast()
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send chat message with headers and increment quota
|
|
||||||
const sendChatMessage = (
|
const sendChatMessage = (
|
||||||
parts: any,
|
parts: any,
|
||||||
xml: string,
|
xml: string,
|
||||||
@@ -1074,7 +1036,6 @@ Continue from EXACTLY where you stopped.`,
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
quotaManager.incrementRequestCount()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process files and append content to user text (handles PDF, text, and optionally images)
|
// Process files and append content to user text (handles PDF, text, and optionally images)
|
||||||
@@ -1162,13 +1123,8 @@ Continue from EXACTLY where you stopped.`,
|
|||||||
setMessages(newMessages)
|
setMessages(newMessages)
|
||||||
})
|
})
|
||||||
|
|
||||||
// Check all quota limits
|
|
||||||
if (!checkAllQuotaLimits()) return
|
|
||||||
|
|
||||||
// Now send the message after state is guaranteed to be updated
|
// Now send the message after state is guaranteed to be updated
|
||||||
sendChatMessage(userParts, savedXml, previousXml, sessionId)
|
sendChatMessage(userParts, savedXml, previousXml, sessionId)
|
||||||
|
|
||||||
// Token count is tracked in onFinish with actual server usage
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const handleEditMessage = async (messageIndex: number, newText: string) => {
|
const handleEditMessage = async (messageIndex: number, newText: string) => {
|
||||||
@@ -1210,12 +1166,8 @@ Continue from EXACTLY where you stopped.`,
|
|||||||
setMessages(newMessages)
|
setMessages(newMessages)
|
||||||
})
|
})
|
||||||
|
|
||||||
// Check all quota limits
|
|
||||||
if (!checkAllQuotaLimits()) return
|
|
||||||
|
|
||||||
// Now send the edited message after state is guaranteed to be updated
|
// Now send the edited message after state is guaranteed to be updated
|
||||||
sendChatMessage(newParts, savedXml, previousXml, sessionId)
|
sendChatMessage(newParts, savedXml, previousXml, sessionId)
|
||||||
// Token count is tracked in onFinish with actual server usage
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Collapsed view (desktop only)
|
// Collapsed view (desktop only)
|
||||||
|
|||||||
238
lib/dynamo-quota-manager.ts
Normal file
238
lib/dynamo-quota-manager.ts
Normal file
@@ -0,0 +1,238 @@
|
|||||||
|
import {
|
||||||
|
ConditionalCheckFailedException,
|
||||||
|
DynamoDBClient,
|
||||||
|
GetItemCommand,
|
||||||
|
UpdateItemCommand,
|
||||||
|
} from "@aws-sdk/client-dynamodb"
|
||||||
|
|
||||||
|
// Quota tracking is OPT-IN: only enabled if DYNAMODB_QUOTA_TABLE is explicitly set
|
||||||
|
// OSS users who don't need quota tracking can simply not set this env var
|
||||||
|
const TABLE = process.env.DYNAMODB_QUOTA_TABLE
|
||||||
|
const DYNAMODB_REGION = process.env.DYNAMODB_REGION || "ap-northeast-1"
|
||||||
|
|
||||||
|
// Only create client if quota is enabled
|
||||||
|
const client = TABLE ? new DynamoDBClient({ region: DYNAMODB_REGION }) : null
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if server-side quota tracking is enabled.
|
||||||
|
* Quota is opt-in: only enabled when DYNAMODB_QUOTA_TABLE env var is set.
|
||||||
|
*/
|
||||||
|
export function isQuotaEnabled(): boolean {
|
||||||
|
return !!TABLE
|
||||||
|
}
|
||||||
|
|
||||||
|
interface QuotaLimits {
|
||||||
|
requests: number // Daily request limit
|
||||||
|
tokens: number // Daily token limit
|
||||||
|
tpm: number // Tokens per minute
|
||||||
|
}
|
||||||
|
|
||||||
|
interface QuotaCheckResult {
|
||||||
|
allowed: boolean
|
||||||
|
error?: string
|
||||||
|
type?: "request" | "token" | "tpm"
|
||||||
|
used?: number
|
||||||
|
limit?: number
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check all quotas and increment request count atomically.
|
||||||
|
* Uses ConditionExpression to prevent race conditions.
|
||||||
|
* Returns which limit was exceeded if any.
|
||||||
|
*/
|
||||||
|
export async function checkAndIncrementRequest(
|
||||||
|
ip: string,
|
||||||
|
limits: QuotaLimits,
|
||||||
|
): Promise<QuotaCheckResult> {
|
||||||
|
// Skip if quota tracking not enabled
|
||||||
|
if (!client || !TABLE) {
|
||||||
|
return { allowed: true }
|
||||||
|
}
|
||||||
|
|
||||||
|
const today = new Date().toISOString().split("T")[0]
|
||||||
|
const currentMinute = Math.floor(Date.now() / 60000).toString()
|
||||||
|
const ttl = Math.floor(Date.now() / 1000) + 7 * 24 * 60 * 60
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Atomic check-and-increment with ConditionExpression
|
||||||
|
// This prevents race conditions by failing if limits are exceeded
|
||||||
|
await client.send(
|
||||||
|
new UpdateItemCommand({
|
||||||
|
TableName: TABLE,
|
||||||
|
Key: { PK: { S: `IP#${ip}` } },
|
||||||
|
// Reset counts if new day/minute, then increment request count
|
||||||
|
UpdateExpression: `
|
||||||
|
SET lastResetDate = :today,
|
||||||
|
dailyReqCount = if_not_exists(dailyReqCount, :zero) + :one,
|
||||||
|
dailyTokenCount = if_not_exists(dailyTokenCount, :zero),
|
||||||
|
lastMinute = :minute,
|
||||||
|
tpmCount = if_not_exists(tpmCount, :zero),
|
||||||
|
#ttl = :ttl
|
||||||
|
`,
|
||||||
|
// Atomic condition: only succeed if ALL limits pass
|
||||||
|
// Uses attribute_not_exists for new items, then checks limits for existing items
|
||||||
|
ConditionExpression: `
|
||||||
|
(attribute_not_exists(lastResetDate) OR lastResetDate < :today OR
|
||||||
|
((attribute_not_exists(dailyReqCount) OR dailyReqCount < :reqLimit) AND
|
||||||
|
(attribute_not_exists(dailyTokenCount) OR dailyTokenCount < :tokenLimit))) AND
|
||||||
|
(attribute_not_exists(lastMinute) OR lastMinute <> :minute OR
|
||||||
|
attribute_not_exists(tpmCount) OR tpmCount < :tpmLimit)
|
||||||
|
`,
|
||||||
|
ExpressionAttributeNames: { "#ttl": "ttl" },
|
||||||
|
ExpressionAttributeValues: {
|
||||||
|
":today": { S: today },
|
||||||
|
":zero": { N: "0" },
|
||||||
|
":one": { N: "1" },
|
||||||
|
":minute": { S: currentMinute },
|
||||||
|
":ttl": { N: String(ttl) },
|
||||||
|
":reqLimit": { N: String(limits.requests || 999999) },
|
||||||
|
":tokenLimit": { N: String(limits.tokens || 999999) },
|
||||||
|
":tpmLimit": { N: String(limits.tpm || 999999) },
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
|
||||||
|
return { allowed: true }
|
||||||
|
} catch (e: any) {
|
||||||
|
// Condition failed - need to determine which limit was exceeded
|
||||||
|
if (e instanceof ConditionalCheckFailedException) {
|
||||||
|
// Get current counts to determine which limit was hit
|
||||||
|
try {
|
||||||
|
const getResult = await client.send(
|
||||||
|
new GetItemCommand({
|
||||||
|
TableName: TABLE,
|
||||||
|
Key: { PK: { S: `IP#${ip}` } },
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
|
||||||
|
const item = getResult.Item
|
||||||
|
const storedDate = item?.lastResetDate?.S
|
||||||
|
const storedMinute = item?.lastMinute?.S
|
||||||
|
const isNewDay = !storedDate || storedDate < today
|
||||||
|
|
||||||
|
const dailyReqCount = isNewDay
|
||||||
|
? 0
|
||||||
|
: Number(item?.dailyReqCount?.N || 0)
|
||||||
|
const dailyTokenCount = isNewDay
|
||||||
|
? 0
|
||||||
|
: Number(item?.dailyTokenCount?.N || 0)
|
||||||
|
const tpmCount =
|
||||||
|
storedMinute !== currentMinute
|
||||||
|
? 0
|
||||||
|
: Number(item?.tpmCount?.N || 0)
|
||||||
|
|
||||||
|
// Determine which limit was exceeded
|
||||||
|
if (limits.requests > 0 && dailyReqCount >= limits.requests) {
|
||||||
|
return {
|
||||||
|
allowed: false,
|
||||||
|
type: "request",
|
||||||
|
error: "Daily request limit exceeded",
|
||||||
|
used: dailyReqCount,
|
||||||
|
limit: limits.requests,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (limits.tokens > 0 && dailyTokenCount >= limits.tokens) {
|
||||||
|
return {
|
||||||
|
allowed: false,
|
||||||
|
type: "token",
|
||||||
|
error: "Daily token limit exceeded",
|
||||||
|
used: dailyTokenCount,
|
||||||
|
limit: limits.tokens,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (limits.tpm > 0 && tpmCount >= limits.tpm) {
|
||||||
|
return {
|
||||||
|
allowed: false,
|
||||||
|
type: "tpm",
|
||||||
|
error: "Rate limit exceeded (tokens per minute)",
|
||||||
|
used: tpmCount,
|
||||||
|
limit: limits.tpm,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Condition failed but no limit clearly exceeded - race condition edge case
|
||||||
|
// Fail safe by allowing (could be a reset race)
|
||||||
|
console.warn(
|
||||||
|
`[quota] Condition failed but no limit exceeded for IP prefix: ${ip.slice(0, 8)}...`,
|
||||||
|
)
|
||||||
|
return { allowed: true }
|
||||||
|
} catch (getError: any) {
|
||||||
|
console.error(
|
||||||
|
`[quota] Failed to get quota details after condition failure, IP prefix: ${ip.slice(0, 8)}..., error: ${getError.message}`,
|
||||||
|
)
|
||||||
|
return { allowed: true } // Fail open
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Other DynamoDB errors - fail open
|
||||||
|
console.error(
|
||||||
|
`[quota] DynamoDB error (fail-open), IP prefix: ${ip.slice(0, 8)}..., error: ${e.message}`,
|
||||||
|
)
|
||||||
|
return { allowed: true }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Record token usage after response completes.
|
||||||
|
* Uses atomic operations to update both daily token count and TPM count.
|
||||||
|
* Handles minute boundaries atomically to prevent race conditions.
|
||||||
|
*/
|
||||||
|
export async function recordTokenUsage(
|
||||||
|
ip: string,
|
||||||
|
tokens: number,
|
||||||
|
): Promise<void> {
|
||||||
|
// Skip if quota tracking not enabled
|
||||||
|
if (!client || !TABLE) return
|
||||||
|
if (!Number.isFinite(tokens) || tokens <= 0) return
|
||||||
|
|
||||||
|
const currentMinute = Math.floor(Date.now() / 60000).toString()
|
||||||
|
const ttl = Math.floor(Date.now() / 1000) + 7 * 24 * 60 * 60
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Try to update assuming same minute (most common case)
|
||||||
|
// Uses condition to ensure we're in the same minute
|
||||||
|
await client.send(
|
||||||
|
new UpdateItemCommand({
|
||||||
|
TableName: TABLE,
|
||||||
|
Key: { PK: { S: `IP#${ip}` } },
|
||||||
|
UpdateExpression:
|
||||||
|
"SET #ttl = :ttl ADD dailyTokenCount :tokens, tpmCount :tokens",
|
||||||
|
ConditionExpression: "lastMinute = :minute",
|
||||||
|
ExpressionAttributeNames: { "#ttl": "ttl" },
|
||||||
|
ExpressionAttributeValues: {
|
||||||
|
":minute": { S: currentMinute },
|
||||||
|
":tokens": { N: String(tokens) },
|
||||||
|
":ttl": { N: String(ttl) },
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
} catch (e: any) {
|
||||||
|
if (e instanceof ConditionalCheckFailedException) {
|
||||||
|
// Different minute - reset TPM count and set new minute
|
||||||
|
try {
|
||||||
|
await client.send(
|
||||||
|
new UpdateItemCommand({
|
||||||
|
TableName: TABLE,
|
||||||
|
Key: { PK: { S: `IP#${ip}` } },
|
||||||
|
UpdateExpression:
|
||||||
|
"SET lastMinute = :minute, tpmCount = :tokens, #ttl = :ttl ADD dailyTokenCount :tokens",
|
||||||
|
ExpressionAttributeNames: { "#ttl": "ttl" },
|
||||||
|
ExpressionAttributeValues: {
|
||||||
|
":minute": { S: currentMinute },
|
||||||
|
":tokens": { N: String(tokens) },
|
||||||
|
":ttl": { N: String(ttl) },
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
} catch (retryError: any) {
|
||||||
|
console.error(
|
||||||
|
`[quota] Failed to record tokens (retry), IP prefix: ${ip.slice(0, 8)}..., tokens: ${tokens}, error: ${retryError.message}`,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.error(
|
||||||
|
`[quota] Failed to record tokens, IP prefix: ${ip.slice(0, 8)}..., tokens: ${tokens}, error: ${e.message}`,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,11 +1,10 @@
|
|||||||
"use client"
|
"use client"
|
||||||
|
|
||||||
import { useCallback, useMemo } from "react"
|
import { useCallback } from "react"
|
||||||
import { toast } from "sonner"
|
import { toast } from "sonner"
|
||||||
import { QuotaLimitToast } from "@/components/quota-limit-toast"
|
import { QuotaLimitToast } from "@/components/quota-limit-toast"
|
||||||
import { useDictionary } from "@/hooks/use-dictionary"
|
import { useDictionary } from "@/hooks/use-dictionary"
|
||||||
import { formatMessage } from "@/lib/i18n/utils"
|
import { formatMessage } from "@/lib/i18n/utils"
|
||||||
import { STORAGE_KEYS } from "@/lib/storage"
|
|
||||||
|
|
||||||
export interface QuotaConfig {
|
export interface QuotaConfig {
|
||||||
dailyRequestLimit: number
|
dailyRequestLimit: number
|
||||||
@@ -13,134 +12,19 @@ export interface QuotaConfig {
|
|||||||
tpmLimit: number
|
tpmLimit: number
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface QuotaCheckResult {
|
|
||||||
allowed: boolean
|
|
||||||
remaining: number
|
|
||||||
used: number
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Hook for managing request/token quotas and rate limiting.
|
* Hook for displaying quota limit toasts.
|
||||||
* Handles three types of limits:
|
* Server-side handles actual quota enforcement via DynamoDB.
|
||||||
* - Daily request limit
|
* This hook only provides UI feedback when limits are exceeded.
|
||||||
* - Daily token limit
|
|
||||||
* - Tokens per minute (TPM) rate limit
|
|
||||||
*
|
|
||||||
* Users with their own API key bypass all limits.
|
|
||||||
*/
|
*/
|
||||||
export function useQuotaManager(config: QuotaConfig): {
|
export function useQuotaManager(config: QuotaConfig): {
|
||||||
hasOwnApiKey: () => boolean
|
|
||||||
checkDailyLimit: () => QuotaCheckResult
|
|
||||||
checkTokenLimit: () => QuotaCheckResult
|
|
||||||
checkTPMLimit: () => QuotaCheckResult
|
|
||||||
incrementRequestCount: () => void
|
|
||||||
incrementTokenCount: (tokens: number) => void
|
|
||||||
incrementTPMCount: (tokens: number) => void
|
|
||||||
showQuotaLimitToast: () => void
|
showQuotaLimitToast: () => void
|
||||||
showTokenLimitToast: (used: number) => void
|
showTokenLimitToast: (used: number) => void
|
||||||
showTPMLimitToast: () => void
|
showTPMLimitToast: () => void
|
||||||
} {
|
} {
|
||||||
const { dailyRequestLimit, dailyTokenLimit, tpmLimit } = config
|
const { dailyRequestLimit, dailyTokenLimit, tpmLimit } = config
|
||||||
|
|
||||||
const dict = useDictionary()
|
const dict = useDictionary()
|
||||||
|
|
||||||
// Check if user has their own API key configured (bypass limits)
|
|
||||||
const hasOwnApiKey = useCallback((): boolean => {
|
|
||||||
const provider = localStorage.getItem(STORAGE_KEYS.aiProvider)
|
|
||||||
const apiKey = localStorage.getItem(STORAGE_KEYS.aiApiKey)
|
|
||||||
return !!(provider && apiKey)
|
|
||||||
}, [])
|
|
||||||
|
|
||||||
// Generic helper: Parse count from localStorage with NaN guard
|
|
||||||
const parseStorageCount = (key: string): number => {
|
|
||||||
const count = parseInt(localStorage.getItem(key) || "0", 10)
|
|
||||||
return Number.isNaN(count) ? 0 : count
|
|
||||||
}
|
|
||||||
|
|
||||||
// Generic helper: Create quota checker factory
|
|
||||||
const createQuotaChecker = useCallback(
|
|
||||||
(
|
|
||||||
getTimeKey: () => string,
|
|
||||||
timeStorageKey: string,
|
|
||||||
countStorageKey: string,
|
|
||||||
limit: number,
|
|
||||||
) => {
|
|
||||||
return (): QuotaCheckResult => {
|
|
||||||
if (hasOwnApiKey())
|
|
||||||
return { allowed: true, remaining: -1, used: 0 }
|
|
||||||
if (limit <= 0) return { allowed: true, remaining: -1, used: 0 }
|
|
||||||
|
|
||||||
const currentTime = getTimeKey()
|
|
||||||
const storedTime = localStorage.getItem(timeStorageKey)
|
|
||||||
let count = parseStorageCount(countStorageKey)
|
|
||||||
|
|
||||||
if (storedTime !== currentTime) {
|
|
||||||
count = 0
|
|
||||||
localStorage.setItem(timeStorageKey, currentTime)
|
|
||||||
localStorage.setItem(countStorageKey, "0")
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
allowed: count < limit,
|
|
||||||
remaining: limit - count,
|
|
||||||
used: count,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
[hasOwnApiKey],
|
|
||||||
)
|
|
||||||
|
|
||||||
// Generic helper: Create quota incrementer factory
|
|
||||||
const createQuotaIncrementer = useCallback(
|
|
||||||
(
|
|
||||||
getTimeKey: () => string,
|
|
||||||
timeStorageKey: string,
|
|
||||||
countStorageKey: string,
|
|
||||||
validateInput: boolean = false,
|
|
||||||
) => {
|
|
||||||
return (tokens: number = 1): void => {
|
|
||||||
if (validateInput && (!Number.isFinite(tokens) || tokens <= 0))
|
|
||||||
return
|
|
||||||
|
|
||||||
const currentTime = getTimeKey()
|
|
||||||
const storedTime = localStorage.getItem(timeStorageKey)
|
|
||||||
let count = parseStorageCount(countStorageKey)
|
|
||||||
|
|
||||||
if (storedTime !== currentTime) {
|
|
||||||
count = 0
|
|
||||||
localStorage.setItem(timeStorageKey, currentTime)
|
|
||||||
}
|
|
||||||
|
|
||||||
localStorage.setItem(countStorageKey, String(count + tokens))
|
|
||||||
}
|
|
||||||
},
|
|
||||||
[],
|
|
||||||
)
|
|
||||||
|
|
||||||
// Check daily request limit
|
|
||||||
const checkDailyLimit = useMemo(
|
|
||||||
() =>
|
|
||||||
createQuotaChecker(
|
|
||||||
() => new Date().toDateString(),
|
|
||||||
STORAGE_KEYS.requestDate,
|
|
||||||
STORAGE_KEYS.requestCount,
|
|
||||||
dailyRequestLimit,
|
|
||||||
),
|
|
||||||
[createQuotaChecker, dailyRequestLimit],
|
|
||||||
)
|
|
||||||
|
|
||||||
// Increment request count
|
|
||||||
const incrementRequestCount = useMemo(
|
|
||||||
() =>
|
|
||||||
createQuotaIncrementer(
|
|
||||||
() => new Date().toDateString(),
|
|
||||||
STORAGE_KEYS.requestDate,
|
|
||||||
STORAGE_KEYS.requestCount,
|
|
||||||
false,
|
|
||||||
),
|
|
||||||
[createQuotaIncrementer],
|
|
||||||
)
|
|
||||||
|
|
||||||
// Show quota limit toast (request-based)
|
// Show quota limit toast (request-based)
|
||||||
const showQuotaLimitToast = useCallback(() => {
|
const showQuotaLimitToast = useCallback(() => {
|
||||||
toast.custom(
|
toast.custom(
|
||||||
@@ -155,30 +39,6 @@ export function useQuotaManager(config: QuotaConfig): {
|
|||||||
)
|
)
|
||||||
}, [dailyRequestLimit])
|
}, [dailyRequestLimit])
|
||||||
|
|
||||||
// Check daily token limit
|
|
||||||
const checkTokenLimit = useMemo(
|
|
||||||
() =>
|
|
||||||
createQuotaChecker(
|
|
||||||
() => new Date().toDateString(),
|
|
||||||
STORAGE_KEYS.tokenDate,
|
|
||||||
STORAGE_KEYS.tokenCount,
|
|
||||||
dailyTokenLimit,
|
|
||||||
),
|
|
||||||
[createQuotaChecker, dailyTokenLimit],
|
|
||||||
)
|
|
||||||
|
|
||||||
// Increment token count
|
|
||||||
const incrementTokenCount = useMemo(
|
|
||||||
() =>
|
|
||||||
createQuotaIncrementer(
|
|
||||||
() => new Date().toDateString(),
|
|
||||||
STORAGE_KEYS.tokenDate,
|
|
||||||
STORAGE_KEYS.tokenCount,
|
|
||||||
true, // Validate input tokens
|
|
||||||
),
|
|
||||||
[createQuotaIncrementer],
|
|
||||||
)
|
|
||||||
|
|
||||||
// Show token limit toast
|
// Show token limit toast
|
||||||
const showTokenLimitToast = useCallback(
|
const showTokenLimitToast = useCallback(
|
||||||
(used: number) => {
|
(used: number) => {
|
||||||
@@ -197,30 +57,6 @@ export function useQuotaManager(config: QuotaConfig): {
|
|||||||
[dailyTokenLimit],
|
[dailyTokenLimit],
|
||||||
)
|
)
|
||||||
|
|
||||||
// Check TPM (tokens per minute) limit
|
|
||||||
const checkTPMLimit = useMemo(
|
|
||||||
() =>
|
|
||||||
createQuotaChecker(
|
|
||||||
() => Math.floor(Date.now() / 60000).toString(),
|
|
||||||
STORAGE_KEYS.tpmMinute,
|
|
||||||
STORAGE_KEYS.tpmCount,
|
|
||||||
tpmLimit,
|
|
||||||
),
|
|
||||||
[createQuotaChecker, tpmLimit],
|
|
||||||
)
|
|
||||||
|
|
||||||
// Increment TPM count
|
|
||||||
const incrementTPMCount = useMemo(
|
|
||||||
() =>
|
|
||||||
createQuotaIncrementer(
|
|
||||||
() => Math.floor(Date.now() / 60000).toString(),
|
|
||||||
STORAGE_KEYS.tpmMinute,
|
|
||||||
STORAGE_KEYS.tpmCount,
|
|
||||||
true, // Validate input tokens
|
|
||||||
),
|
|
||||||
[createQuotaIncrementer],
|
|
||||||
)
|
|
||||||
|
|
||||||
// Show TPM limit toast
|
// Show TPM limit toast
|
||||||
const showTPMLimitToast = useCallback(() => {
|
const showTPMLimitToast = useCallback(() => {
|
||||||
const limitDisplay =
|
const limitDisplay =
|
||||||
@@ -233,18 +69,6 @@ export function useQuotaManager(config: QuotaConfig): {
|
|||||||
}, [tpmLimit, dict])
|
}, [tpmLimit, dict])
|
||||||
|
|
||||||
return {
|
return {
|
||||||
// Check functions
|
|
||||||
hasOwnApiKey,
|
|
||||||
checkDailyLimit,
|
|
||||||
checkTokenLimit,
|
|
||||||
checkTPMLimit,
|
|
||||||
|
|
||||||
// Increment functions
|
|
||||||
incrementRequestCount,
|
|
||||||
incrementTokenCount,
|
|
||||||
incrementTPMCount,
|
|
||||||
|
|
||||||
// Toast functions
|
|
||||||
showQuotaLimitToast,
|
showQuotaLimitToast,
|
||||||
showTokenLimitToast,
|
showTokenLimitToast,
|
||||||
showTPMLimitToast,
|
showTPMLimitToast,
|
||||||
|
|||||||
991
package-lock.json
generated
991
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -32,6 +32,7 @@
|
|||||||
"@ai-sdk/google": "^3.0.0",
|
"@ai-sdk/google": "^3.0.0",
|
||||||
"@ai-sdk/openai": "^3.0.0",
|
"@ai-sdk/openai": "^3.0.0",
|
||||||
"@ai-sdk/react": "^3.0.1",
|
"@ai-sdk/react": "^3.0.1",
|
||||||
|
"@aws-sdk/client-dynamodb": "^3.957.0",
|
||||||
"@aws-sdk/credential-providers": "^3.943.0",
|
"@aws-sdk/credential-providers": "^3.943.0",
|
||||||
"@formatjs/intl-localematcher": "^0.7.2",
|
"@formatjs/intl-localematcher": "^0.7.2",
|
||||||
"@langfuse/client": "^4.4.9",
|
"@langfuse/client": "^4.4.9",
|
||||||
|
|||||||
Reference in New Issue
Block a user