mirror of
https://github.com/DayuanJiang/next-ai-draw-io.git
synced 2026-01-02 22:32:27 +08:00
feat: add daily token limit with actual usage tracking (#171)
* feat: add daily token limit with actual usage tracking - Add DAILY_TOKEN_LIMIT env var for configurable daily token limit - Track actual tokens from Bedrock API response metadata (not estimates) - Server sends inputTokens + cachedInputTokens + outputTokens via messageMetadata - Client increments token count in onFinish callback with actual usage - Add NaN guards to prevent corrupted localStorage values - Add token limit toast notification with quota display - Remove client-side token estimation (was blocking legitimate requests) - Switch to js-tiktoken for client compatibility (pure JS, no WASM) * feat: add TPM (tokens per minute) rate limiting - Add 50k tokens/min client-side rate limit - Track tokens per minute with automatic minute rollover - Check TPM limit after daily limits pass - Show toast when rate limit reached - NaN guards for localStorage values * feat: make TPM limit configurable via TPM_LIMIT env var * chore: restore cache debug logs * fix: prevent race condition in TPM tracking checkTPMLimit was resetting TPM count to 0 when checking, which overwrote the count saved by incrementTPMCount. Now checkTPMLimit only reads and incrementTPMCount handles all writes. * chore: improve TPM limit error message clarity
This commit is contained in:
@@ -33,6 +33,10 @@ const STORAGE_SESSION_ID_KEY = "next-ai-draw-io-session-id"
|
||||
const STORAGE_DIAGRAM_XML_KEY = "next-ai-draw-io-diagram-xml"
|
||||
const STORAGE_REQUEST_COUNT_KEY = "next-ai-draw-io-request-count"
|
||||
const STORAGE_REQUEST_DATE_KEY = "next-ai-draw-io-request-date"
|
||||
const STORAGE_TOKEN_COUNT_KEY = "next-ai-draw-io-token-count"
|
||||
const STORAGE_TOKEN_DATE_KEY = "next-ai-draw-io-token-date"
|
||||
const STORAGE_TPM_COUNT_KEY = "next-ai-draw-io-tpm-count"
|
||||
const STORAGE_TPM_MINUTE_KEY = "next-ai-draw-io-tpm-minute"
|
||||
|
||||
import { useDiagram } from "@/contexts/diagram-context"
|
||||
import { findCachedResponse } from "@/lib/cached-responses"
|
||||
@@ -98,6 +102,8 @@ export default function ChatPanel({
|
||||
const [, setAccessCodeRequired] = useState(false)
|
||||
const [input, setInput] = useState("")
|
||||
const [dailyRequestLimit, setDailyRequestLimit] = useState(0)
|
||||
const [dailyTokenLimit, setDailyTokenLimit] = useState(0)
|
||||
const [tpmLimit, setTpmLimit] = useState(0)
|
||||
|
||||
// Check config on mount
|
||||
useEffect(() => {
|
||||
@@ -106,6 +112,8 @@ export default function ChatPanel({
|
||||
.then((data) => {
|
||||
setAccessCodeRequired(data.accessCodeRequired)
|
||||
setDailyRequestLimit(data.dailyRequestLimit || 0)
|
||||
setDailyTokenLimit(data.dailyTokenLimit || 0)
|
||||
setTpmLimit(data.tpmLimit || 0)
|
||||
})
|
||||
.catch(() => setAccessCodeRequired(false))
|
||||
}, [])
|
||||
@@ -148,7 +156,7 @@ export default function ChatPanel({
|
||||
localStorage.setItem(STORAGE_REQUEST_COUNT_KEY, String(count + 1))
|
||||
}, [])
|
||||
|
||||
// Helper to show quota limit toast
|
||||
// Helper to show quota limit toast (request-based)
|
||||
const showQuotaLimitToast = useCallback(() => {
|
||||
toast.custom(
|
||||
(t) => (
|
||||
@@ -162,6 +170,136 @@ export default function ChatPanel({
|
||||
)
|
||||
}, [dailyRequestLimit])
|
||||
|
||||
// Helper to check daily token limit (checks if already over limit)
|
||||
const checkTokenLimit = useCallback((): {
|
||||
allowed: boolean
|
||||
remaining: number
|
||||
used: number
|
||||
} => {
|
||||
if (dailyTokenLimit <= 0)
|
||||
return { allowed: true, remaining: -1, used: 0 }
|
||||
|
||||
const today = new Date().toDateString()
|
||||
const storedDate = localStorage.getItem(STORAGE_TOKEN_DATE_KEY)
|
||||
let count = parseInt(
|
||||
localStorage.getItem(STORAGE_TOKEN_COUNT_KEY) || "0",
|
||||
10,
|
||||
)
|
||||
|
||||
// Guard against NaN (e.g., if "NaN" was stored)
|
||||
if (Number.isNaN(count)) count = 0
|
||||
|
||||
if (storedDate !== today) {
|
||||
count = 0
|
||||
localStorage.setItem(STORAGE_TOKEN_DATE_KEY, today)
|
||||
localStorage.setItem(STORAGE_TOKEN_COUNT_KEY, "0")
|
||||
}
|
||||
|
||||
return {
|
||||
allowed: count < dailyTokenLimit,
|
||||
remaining: dailyTokenLimit - count,
|
||||
used: count,
|
||||
}
|
||||
}, [dailyTokenLimit])
|
||||
|
||||
// Helper to increment token count
|
||||
const incrementTokenCount = useCallback((tokens: number): void => {
|
||||
// Guard against NaN tokens
|
||||
if (!Number.isFinite(tokens) || tokens <= 0) return
|
||||
|
||||
let count = parseInt(
|
||||
localStorage.getItem(STORAGE_TOKEN_COUNT_KEY) || "0",
|
||||
10,
|
||||
)
|
||||
// Guard against NaN count
|
||||
if (Number.isNaN(count)) count = 0
|
||||
|
||||
localStorage.setItem(STORAGE_TOKEN_COUNT_KEY, String(count + tokens))
|
||||
}, [])
|
||||
|
||||
// Helper to show token limit toast
|
||||
const showTokenLimitToast = useCallback(
|
||||
(used: number) => {
|
||||
toast.custom(
|
||||
(t) => (
|
||||
<QuotaLimitToast
|
||||
type="token"
|
||||
used={used}
|
||||
limit={dailyTokenLimit}
|
||||
onDismiss={() => toast.dismiss(t)}
|
||||
/>
|
||||
),
|
||||
{ duration: 15000 },
|
||||
)
|
||||
},
|
||||
[dailyTokenLimit],
|
||||
)
|
||||
|
||||
// Helper to check TPM (tokens per minute) limit
|
||||
// Note: This only READS, doesn't write. incrementTPMCount handles writes.
|
||||
const checkTPMLimit = useCallback((): {
|
||||
allowed: boolean
|
||||
remaining: number
|
||||
used: number
|
||||
} => {
|
||||
if (tpmLimit <= 0) return { allowed: true, remaining: -1, used: 0 }
|
||||
|
||||
const currentMinute = Math.floor(Date.now() / 60000).toString()
|
||||
const storedMinute = localStorage.getItem(STORAGE_TPM_MINUTE_KEY)
|
||||
let count = parseInt(
|
||||
localStorage.getItem(STORAGE_TPM_COUNT_KEY) || "0",
|
||||
10,
|
||||
)
|
||||
|
||||
// Guard against NaN
|
||||
if (Number.isNaN(count)) count = 0
|
||||
|
||||
// If we're in a new minute, treat count as 0 (will be reset on next increment)
|
||||
if (storedMinute !== currentMinute) {
|
||||
count = 0
|
||||
}
|
||||
|
||||
return {
|
||||
allowed: count < tpmLimit,
|
||||
remaining: tpmLimit - count,
|
||||
used: count,
|
||||
}
|
||||
}, [tpmLimit])
|
||||
|
||||
// Helper to increment TPM count
|
||||
const incrementTPMCount = useCallback((tokens: number): void => {
|
||||
// Guard against NaN tokens
|
||||
if (!Number.isFinite(tokens) || tokens <= 0) return
|
||||
|
||||
const currentMinute = Math.floor(Date.now() / 60000).toString()
|
||||
const storedMinute = localStorage.getItem(STORAGE_TPM_MINUTE_KEY)
|
||||
let count = parseInt(
|
||||
localStorage.getItem(STORAGE_TPM_COUNT_KEY) || "0",
|
||||
10,
|
||||
)
|
||||
|
||||
// Guard against NaN
|
||||
if (Number.isNaN(count)) count = 0
|
||||
|
||||
// Reset if we're in a new minute
|
||||
if (storedMinute !== currentMinute) {
|
||||
count = 0
|
||||
localStorage.setItem(STORAGE_TPM_MINUTE_KEY, currentMinute)
|
||||
}
|
||||
|
||||
localStorage.setItem(STORAGE_TPM_COUNT_KEY, String(count + tokens))
|
||||
}, [])
|
||||
|
||||
// Helper to show TPM limit toast
|
||||
const showTPMLimitToast = useCallback(() => {
|
||||
const limitDisplay =
|
||||
tpmLimit >= 1000 ? `${tpmLimit / 1000}k` : String(tpmLimit)
|
||||
toast.error(
|
||||
`Rate limit reached (${limitDisplay} tokens/min). Please wait 60 seconds before sending another request.`,
|
||||
{ duration: 8000 },
|
||||
)
|
||||
}, [tpmLimit])
|
||||
|
||||
// Generate a unique session ID for Langfuse tracing (restore from localStorage if available)
|
||||
const [sessionId, setSessionId] = useState(() => {
|
||||
if (typeof window !== "undefined") {
|
||||
@@ -341,6 +479,26 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
|
||||
setShowSettingsDialog(true)
|
||||
}
|
||||
},
|
||||
onFinish: ({ message }) => {
|
||||
// Track actual token usage from server metadata
|
||||
const metadata = message?.metadata as
|
||||
| Record<string, unknown>
|
||||
| undefined
|
||||
if (metadata) {
|
||||
// Use Number.isFinite to guard against NaN (typeof NaN === 'number' is true)
|
||||
const inputTokens = Number.isFinite(metadata.inputTokens)
|
||||
? (metadata.inputTokens as number)
|
||||
: 0
|
||||
const outputTokens = Number.isFinite(metadata.outputTokens)
|
||||
? (metadata.outputTokens as number)
|
||||
: 0
|
||||
const actualTokens = inputTokens + outputTokens
|
||||
if (actualTokens > 0) {
|
||||
incrementTokenCount(actualTokens)
|
||||
incrementTPMCount(actualTokens)
|
||||
}
|
||||
}
|
||||
},
|
||||
// Auto-resubmit when all tool results are available (including errors)
|
||||
// This enables the model to retry when a tool returns an error
|
||||
sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,
|
||||
@@ -585,6 +743,20 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
|
||||
return
|
||||
}
|
||||
|
||||
// Check daily token limit (actual usage tracked after response)
|
||||
const tokenLimitCheck = checkTokenLimit()
|
||||
if (!tokenLimitCheck.allowed) {
|
||||
showTokenLimitToast(tokenLimitCheck.used)
|
||||
return
|
||||
}
|
||||
|
||||
// Check TPM (tokens per minute) limit
|
||||
const tpmCheck = checkTPMLimit()
|
||||
if (!tpmCheck.allowed) {
|
||||
showTPMLimitToast()
|
||||
return
|
||||
}
|
||||
|
||||
const accessCode =
|
||||
localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || ""
|
||||
sendMessage(
|
||||
@@ -601,6 +773,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
|
||||
)
|
||||
|
||||
incrementRequestCount()
|
||||
// Token count is tracked in onFinish with actual server usage
|
||||
setInput("")
|
||||
setFiles([])
|
||||
} catch (error) {
|
||||
@@ -679,6 +852,20 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
|
||||
return
|
||||
}
|
||||
|
||||
// Check daily token limit (actual usage tracked after response)
|
||||
const tokenLimitCheck = checkTokenLimit()
|
||||
if (!tokenLimitCheck.allowed) {
|
||||
showTokenLimitToast(tokenLimitCheck.used)
|
||||
return
|
||||
}
|
||||
|
||||
// Check TPM (tokens per minute) limit
|
||||
const tpmCheck = checkTPMLimit()
|
||||
if (!tpmCheck.allowed) {
|
||||
showTPMLimitToast()
|
||||
return
|
||||
}
|
||||
|
||||
// Now send the message after state is guaranteed to be updated
|
||||
const accessCode = localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || ""
|
||||
sendMessage(
|
||||
@@ -695,6 +882,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
|
||||
)
|
||||
|
||||
incrementRequestCount()
|
||||
// Token count is tracked in onFinish with actual server usage
|
||||
}
|
||||
|
||||
const handleEditMessage = async (messageIndex: number, newText: string) => {
|
||||
@@ -750,6 +938,20 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
|
||||
return
|
||||
}
|
||||
|
||||
// Check daily token limit (actual usage tracked after response)
|
||||
const tokenLimitCheck = checkTokenLimit()
|
||||
if (!tokenLimitCheck.allowed) {
|
||||
showTokenLimitToast(tokenLimitCheck.used)
|
||||
return
|
||||
}
|
||||
|
||||
// Check TPM (tokens per minute) limit
|
||||
const tpmCheck = checkTPMLimit()
|
||||
if (!tpmCheck.allowed) {
|
||||
showTPMLimitToast()
|
||||
return
|
||||
}
|
||||
|
||||
// Now send the edited message after state is guaranteed to be updated
|
||||
const accessCode = localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || ""
|
||||
sendMessage(
|
||||
@@ -766,6 +968,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
|
||||
)
|
||||
|
||||
incrementRequestCount()
|
||||
// Token count is tracked in onFinish with actual server usage
|
||||
}
|
||||
|
||||
// Collapsed view (desktop only)
|
||||
|
||||
Reference in New Issue
Block a user