feat: add server-side quota tracking with DynamoDB (#379)

- Add dynamo-quota-manager.ts for atomic quota checks using ConditionExpression
- Enforce daily request limit, daily token limit, and TPM limit
- Return 429 with quota details (type, used, limit) when exceeded
- Quota is opt-in: only enabled when DYNAMODB_QUOTA_TABLE env var is set
- Remove client-side quota enforcement (server is now source of truth)
- Simplify use-quota-manager.tsx to only display toasts
- Add @aws-sdk/client-dynamodb dependency
This commit is contained in:
Dayuan Jiang
2025-12-23 18:36:27 +09:00
committed by GitHub
parent 5ec05eb100
commit 97ae9395cd
6 changed files with 1128 additions and 416 deletions

View File

@@ -556,6 +556,23 @@ Continue from EXACTLY where you stopped.`,
}
},
onError: (error) => {
// Handle server-side quota limit (429 response)
if (error.message.includes("Daily request limit")) {
quotaManager.showQuotaLimitToast()
return
}
if (error.message.includes("Daily token limit")) {
quotaManager.showTokenLimitToast(dailyTokenLimit)
return
}
if (
error.message.includes("Rate limit exceeded") ||
error.message.includes("tokens per minute")
) {
quotaManager.showTPMLimitToast()
return
}
// Silence access code error in console since it's handled by UI
if (!error.message.includes("Invalid or missing access code")) {
console.error("Chat error:", error)
@@ -632,16 +649,6 @@ Continue from EXACTLY where you stopped.`,
// DEBUG: Log finish reason to diagnose truncation
console.log("[onFinish] finishReason:", metadata?.finishReason)
// AI SDK 6 provides totalTokens directly
const totalTokens =
metadata && Number.isFinite(metadata.totalTokens)
? (metadata.totalTokens as number)
: 0
if (totalTokens > 0) {
quotaManager.incrementTokenCount(totalTokens)
quotaManager.incrementTPMCount(totalTokens)
}
},
sendAutomaticallyWhen: ({ messages }) => {
const isInContinuationMode = partialXmlRef.current.length > 0
@@ -686,25 +693,6 @@ Continue from EXACTLY where you stopped.`,
autoRetryCountRef.current++
}
// Check quota limits before auto-retry
const tokenLimitCheck = quotaManager.checkTokenLimit()
if (!tokenLimitCheck.allowed) {
quotaManager.showTokenLimitToast(tokenLimitCheck.used)
autoRetryCountRef.current = 0
continuationRetryCountRef.current = 0
partialXmlRef.current = ""
return false
}
const tpmCheck = quotaManager.checkTPMLimit()
if (!tpmCheck.allowed) {
quotaManager.showTPMLimitToast()
autoRetryCountRef.current = 0
continuationRetryCountRef.current = 0
partialXmlRef.current = ""
return false
}
return true
},
})
@@ -921,9 +909,6 @@ Continue from EXACTLY where you stopped.`,
xmlSnapshotsRef.current.set(messageIndex, chartXml)
saveXmlSnapshots()
// Check all quota limits
if (!checkAllQuotaLimits()) return
sendChatMessage(parts, chartXml, previousXml, sessionId)
// Token count is tracked in onFinish with actual server usage
@@ -1001,30 +986,7 @@ Continue from EXACTLY where you stopped.`,
saveXmlSnapshots()
}
// Check all quota limits (daily requests, tokens, TPM)
const checkAllQuotaLimits = (): boolean => {
const limitCheck = quotaManager.checkDailyLimit()
if (!limitCheck.allowed) {
quotaManager.showQuotaLimitToast()
return false
}
const tokenLimitCheck = quotaManager.checkTokenLimit()
if (!tokenLimitCheck.allowed) {
quotaManager.showTokenLimitToast(tokenLimitCheck.used)
return false
}
const tpmCheck = quotaManager.checkTPMLimit()
if (!tpmCheck.allowed) {
quotaManager.showTPMLimitToast()
return false
}
return true
}
// Send chat message with headers and increment quota
// Send chat message with headers
const sendChatMessage = (
parts: any,
xml: string,
@@ -1074,7 +1036,6 @@ Continue from EXACTLY where you stopped.`,
},
},
)
quotaManager.incrementRequestCount()
}
// Process files and append content to user text (handles PDF, text, and optionally images)
@@ -1162,13 +1123,8 @@ Continue from EXACTLY where you stopped.`,
setMessages(newMessages)
})
// Check all quota limits
if (!checkAllQuotaLimits()) return
// Now send the message after state is guaranteed to be updated
sendChatMessage(userParts, savedXml, previousXml, sessionId)
// Token count is tracked in onFinish with actual server usage
}
const handleEditMessage = async (messageIndex: number, newText: string) => {
@@ -1210,12 +1166,8 @@ Continue from EXACTLY where you stopped.`,
setMessages(newMessages)
})
// Check all quota limits
if (!checkAllQuotaLimits()) return
// Now send the edited message after state is guaranteed to be updated
sendChatMessage(newParts, savedXml, previousXml, sessionId)
// Token count is tracked in onFinish with actual server usage
}
// Collapsed view (desktop only)