feat: add daily token limit with actual usage tracking (#171)

* feat: add daily token limit with actual usage tracking

- Add DAILY_TOKEN_LIMIT env var for configurable daily token limit
- Track actual tokens from Bedrock API response metadata (not estimates)
- Server sends inputTokens + cachedInputTokens + outputTokens via messageMetadata
- Client increments token count in onFinish callback with actual usage
- Add NaN guards to prevent corrupted localStorage values
- Add token limit toast notification with quota display
- Remove client-side token estimation (was blocking legitimate requests)
- Switch to js-tiktoken for client compatibility (pure JS, no WASM)

* feat: add TPM (tokens per minute) rate limiting

- Add 50k tokens/min client-side rate limit
- Track tokens per minute with automatic minute rollover
- Check TPM limit after daily limits pass
- Show toast when rate limit reached
- NaN guards for localStorage values

* feat: make TPM limit configurable via TPM_LIMIT env var

* chore: restore cache debug logs

* fix: prevent race condition in TPM tracking

checkTPMLimit was resetting TPM count to 0 when checking, which
overwrote the count saved by incrementTPMCount. Now checkTPMLimit
only reads and incrementTPMCount handles all writes.

* chore: improve TPM limit error message clarity
This commit is contained in:
Dayuan Jiang
2025-12-08 18:56:34 +09:00
committed by GitHub
parent 728dda5267
commit 622829b903
7 changed files with 285 additions and 66 deletions

View File

@@ -33,6 +33,10 @@ const STORAGE_SESSION_ID_KEY = "next-ai-draw-io-session-id"
const STORAGE_DIAGRAM_XML_KEY = "next-ai-draw-io-diagram-xml"
const STORAGE_REQUEST_COUNT_KEY = "next-ai-draw-io-request-count"
const STORAGE_REQUEST_DATE_KEY = "next-ai-draw-io-request-date"
const STORAGE_TOKEN_COUNT_KEY = "next-ai-draw-io-token-count"
const STORAGE_TOKEN_DATE_KEY = "next-ai-draw-io-token-date"
const STORAGE_TPM_COUNT_KEY = "next-ai-draw-io-tpm-count"
const STORAGE_TPM_MINUTE_KEY = "next-ai-draw-io-tpm-minute"
import { useDiagram } from "@/contexts/diagram-context"
import { findCachedResponse } from "@/lib/cached-responses"
@@ -98,6 +102,8 @@ export default function ChatPanel({
const [, setAccessCodeRequired] = useState(false)
const [input, setInput] = useState("")
const [dailyRequestLimit, setDailyRequestLimit] = useState(0)
const [dailyTokenLimit, setDailyTokenLimit] = useState(0)
const [tpmLimit, setTpmLimit] = useState(0)
// Check config on mount
useEffect(() => {
@@ -106,6 +112,8 @@ export default function ChatPanel({
.then((data) => {
setAccessCodeRequired(data.accessCodeRequired)
setDailyRequestLimit(data.dailyRequestLimit || 0)
setDailyTokenLimit(data.dailyTokenLimit || 0)
setTpmLimit(data.tpmLimit || 0)
})
.catch(() => setAccessCodeRequired(false))
}, [])
@@ -148,7 +156,7 @@ export default function ChatPanel({
localStorage.setItem(STORAGE_REQUEST_COUNT_KEY, String(count + 1))
}, [])
// Helper to show quota limit toast
// Helper to show quota limit toast (request-based)
const showQuotaLimitToast = useCallback(() => {
toast.custom(
(t) => (
@@ -162,6 +170,136 @@ export default function ChatPanel({
)
}, [dailyRequestLimit])
// Helper to check daily token limit (checks if already over limit)
const checkTokenLimit = useCallback((): {
allowed: boolean
remaining: number
used: number
} => {
if (dailyTokenLimit <= 0)
return { allowed: true, remaining: -1, used: 0 }
const today = new Date().toDateString()
const storedDate = localStorage.getItem(STORAGE_TOKEN_DATE_KEY)
let count = parseInt(
localStorage.getItem(STORAGE_TOKEN_COUNT_KEY) || "0",
10,
)
// Guard against NaN (e.g., if "NaN" was stored)
if (Number.isNaN(count)) count = 0
if (storedDate !== today) {
count = 0
localStorage.setItem(STORAGE_TOKEN_DATE_KEY, today)
localStorage.setItem(STORAGE_TOKEN_COUNT_KEY, "0")
}
return {
allowed: count < dailyTokenLimit,
remaining: dailyTokenLimit - count,
used: count,
}
}, [dailyTokenLimit])
// Helper to increment token count
const incrementTokenCount = useCallback((tokens: number): void => {
// Guard against NaN tokens
if (!Number.isFinite(tokens) || tokens <= 0) return
let count = parseInt(
localStorage.getItem(STORAGE_TOKEN_COUNT_KEY) || "0",
10,
)
// Guard against NaN count
if (Number.isNaN(count)) count = 0
localStorage.setItem(STORAGE_TOKEN_COUNT_KEY, String(count + tokens))
}, [])
// Helper to show token limit toast
const showTokenLimitToast = useCallback(
(used: number) => {
toast.custom(
(t) => (
<QuotaLimitToast
type="token"
used={used}
limit={dailyTokenLimit}
onDismiss={() => toast.dismiss(t)}
/>
),
{ duration: 15000 },
)
},
[dailyTokenLimit],
)
// Helper to check TPM (tokens per minute) limit
// Note: This only READS, doesn't write. incrementTPMCount handles writes.
const checkTPMLimit = useCallback((): {
allowed: boolean
remaining: number
used: number
} => {
if (tpmLimit <= 0) return { allowed: true, remaining: -1, used: 0 }
const currentMinute = Math.floor(Date.now() / 60000).toString()
const storedMinute = localStorage.getItem(STORAGE_TPM_MINUTE_KEY)
let count = parseInt(
localStorage.getItem(STORAGE_TPM_COUNT_KEY) || "0",
10,
)
// Guard against NaN
if (Number.isNaN(count)) count = 0
// If we're in a new minute, treat count as 0 (will be reset on next increment)
if (storedMinute !== currentMinute) {
count = 0
}
return {
allowed: count < tpmLimit,
remaining: tpmLimit - count,
used: count,
}
}, [tpmLimit])
// Helper to increment TPM count
const incrementTPMCount = useCallback((tokens: number): void => {
// Guard against NaN tokens
if (!Number.isFinite(tokens) || tokens <= 0) return
const currentMinute = Math.floor(Date.now() / 60000).toString()
const storedMinute = localStorage.getItem(STORAGE_TPM_MINUTE_KEY)
let count = parseInt(
localStorage.getItem(STORAGE_TPM_COUNT_KEY) || "0",
10,
)
// Guard against NaN
if (Number.isNaN(count)) count = 0
// Reset if we're in a new minute
if (storedMinute !== currentMinute) {
count = 0
localStorage.setItem(STORAGE_TPM_MINUTE_KEY, currentMinute)
}
localStorage.setItem(STORAGE_TPM_COUNT_KEY, String(count + tokens))
}, [])
// Helper to show TPM limit toast
const showTPMLimitToast = useCallback(() => {
const limitDisplay =
tpmLimit >= 1000 ? `${tpmLimit / 1000}k` : String(tpmLimit)
toast.error(
`Rate limit reached (${limitDisplay} tokens/min). Please wait 60 seconds before sending another request.`,
{ duration: 8000 },
)
}, [tpmLimit])
// Generate a unique session ID for Langfuse tracing (restore from localStorage if available)
const [sessionId, setSessionId] = useState(() => {
if (typeof window !== "undefined") {
@@ -341,6 +479,26 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
setShowSettingsDialog(true)
}
},
onFinish: ({ message }) => {
// Track actual token usage from server metadata
const metadata = message?.metadata as
| Record<string, unknown>
| undefined
if (metadata) {
// Use Number.isFinite to guard against NaN (typeof NaN === 'number' is true)
const inputTokens = Number.isFinite(metadata.inputTokens)
? (metadata.inputTokens as number)
: 0
const outputTokens = Number.isFinite(metadata.outputTokens)
? (metadata.outputTokens as number)
: 0
const actualTokens = inputTokens + outputTokens
if (actualTokens > 0) {
incrementTokenCount(actualTokens)
incrementTPMCount(actualTokens)
}
}
},
// Auto-resubmit when all tool results are available (including errors)
// This enables the model to retry when a tool returns an error
sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,
@@ -585,6 +743,20 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
return
}
// Check daily token limit (actual usage tracked after response)
const tokenLimitCheck = checkTokenLimit()
if (!tokenLimitCheck.allowed) {
showTokenLimitToast(tokenLimitCheck.used)
return
}
// Check TPM (tokens per minute) limit
const tpmCheck = checkTPMLimit()
if (!tpmCheck.allowed) {
showTPMLimitToast()
return
}
const accessCode =
localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || ""
sendMessage(
@@ -601,6 +773,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
)
incrementRequestCount()
// Token count is tracked in onFinish with actual server usage
setInput("")
setFiles([])
} catch (error) {
@@ -679,6 +852,20 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
return
}
// Check daily token limit (actual usage tracked after response)
const tokenLimitCheck = checkTokenLimit()
if (!tokenLimitCheck.allowed) {
showTokenLimitToast(tokenLimitCheck.used)
return
}
// Check TPM (tokens per minute) limit
const tpmCheck = checkTPMLimit()
if (!tpmCheck.allowed) {
showTPMLimitToast()
return
}
// Now send the message after state is guaranteed to be updated
const accessCode = localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || ""
sendMessage(
@@ -695,6 +882,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
)
incrementRequestCount()
// Token count is tracked in onFinish with actual server usage
}
const handleEditMessage = async (messageIndex: number, newText: string) => {
@@ -750,6 +938,20 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
return
}
// Check daily token limit (actual usage tracked after response)
const tokenLimitCheck = checkTokenLimit()
if (!tokenLimitCheck.allowed) {
showTokenLimitToast(tokenLimitCheck.used)
return
}
// Check TPM (tokens per minute) limit
const tpmCheck = checkTPMLimit()
if (!tpmCheck.allowed) {
showTPMLimitToast()
return
}
// Now send the edited message after state is guaranteed to be updated
const accessCode = localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || ""
sendMessage(
@@ -766,6 +968,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
)
incrementRequestCount()
// Token count is tracked in onFinish with actual server usage
}
// Collapsed view (desktop only)