feat: add daily token limit with actual usage tracking (#171)

* feat: add daily token limit with actual usage tracking - Add DAILY_TOKEN_LIMIT env var for configurable daily token limit - Track actual tokens from Bedrock API response metadata (not estimates) - Server sends inputTokens + cachedInputTokens + outputTokens via messageMetadata - Client increments token count in onFinish callback with actual usage - Add NaN guards to prevent corrupted localStorage values - Add token limit toast notification with quota display - Remove client-side token estimation (was blocking legitimate requests) - Switch to js-tiktoken for client compatibility (pure JS, no WASM) * feat: add TPM (tokens per minute) rate limiting - Add 50k tokens/min client-side rate limit - Track tokens per minute with automatic minute rollover - Check TPM limit after daily limits pass - Show toast when rate limit reached - NaN guards for localStorage values * feat: make TPM limit configurable via TPM_LIMIT env var * chore: restore cache debug logs * fix: prevent race condition in TPM tracking checkTPMLimit was resetting TPM count to 0 when checking, which overwrote the count saved by incrementTPMCount. Now checkTPMLimit only reads and incrementTPMCount handles all writes. * chore: improve TPM limit error message clarity
2026-01-02 22:32:27 +08:00 · 2025-12-08 18:56:34 +09:00
parent 728dda5267
commit 622829b903
7 changed files with 285 additions and 66 deletions
--- a/components/chat-panel.tsx
+++ b/components/chat-panel.tsx
@@ -33,6 +33,10 @@ const STORAGE_SESSION_ID_KEY = "next-ai-draw-io-session-id"
 const STORAGE_DIAGRAM_XML_KEY = "next-ai-draw-io-diagram-xml"
 const STORAGE_REQUEST_COUNT_KEY = "next-ai-draw-io-request-count"
 const STORAGE_REQUEST_DATE_KEY = "next-ai-draw-io-request-date"
+const STORAGE_TOKEN_COUNT_KEY = "next-ai-draw-io-token-count"
+const STORAGE_TOKEN_DATE_KEY = "next-ai-draw-io-token-date"
+const STORAGE_TPM_COUNT_KEY = "next-ai-draw-io-tpm-count"
+const STORAGE_TPM_MINUTE_KEY = "next-ai-draw-io-tpm-minute"

 import { useDiagram } from "@/contexts/diagram-context"
 import { findCachedResponse } from "@/lib/cached-responses"
@@ -98,6 +102,8 @@ export default function ChatPanel({
    const [, setAccessCodeRequired] = useState(false)
    const [input, setInput] = useState("")
    const [dailyRequestLimit, setDailyRequestLimit] = useState(0)
+    const [dailyTokenLimit, setDailyTokenLimit] = useState(0)
+    const [tpmLimit, setTpmLimit] = useState(0)

    // Check config on mount
    useEffect(() => {
@@ -106,6 +112,8 @@ export default function ChatPanel({
            .then((data) => {
                setAccessCodeRequired(data.accessCodeRequired)
                setDailyRequestLimit(data.dailyRequestLimit || 0)
+                setDailyTokenLimit(data.dailyTokenLimit || 0)
+                setTpmLimit(data.tpmLimit || 0)
            })
            .catch(() => setAccessCodeRequired(false))
    }, [])
@@ -148,7 +156,7 @@ export default function ChatPanel({
        localStorage.setItem(STORAGE_REQUEST_COUNT_KEY, String(count + 1))
    }, [])

-    // Helper to show quota limit toast
+    // Helper to show quota limit toast (request-based)
    const showQuotaLimitToast = useCallback(() => {
        toast.custom(
            (t) => (
@@ -162,6 +170,136 @@ export default function ChatPanel({
        )
    }, [dailyRequestLimit])

+    // Helper to check daily token limit (checks if already over limit)
+    const checkTokenLimit = useCallback((): {
+        allowed: boolean
+        remaining: number
+        used: number
+    } => {
+        if (dailyTokenLimit <= 0)
+            return { allowed: true, remaining: -1, used: 0 }
+
+        const today = new Date().toDateString()
+        const storedDate = localStorage.getItem(STORAGE_TOKEN_DATE_KEY)
+        let count = parseInt(
+            localStorage.getItem(STORAGE_TOKEN_COUNT_KEY) || "0",
+            10,
+        )
+
+        // Guard against NaN (e.g., if "NaN" was stored)
+        if (Number.isNaN(count)) count = 0
+
+        if (storedDate !== today) {
+            count = 0
+            localStorage.setItem(STORAGE_TOKEN_DATE_KEY, today)
+            localStorage.setItem(STORAGE_TOKEN_COUNT_KEY, "0")
+        }
+
+        return {
+            allowed: count < dailyTokenLimit,
+            remaining: dailyTokenLimit - count,
+            used: count,
+        }
+    }, [dailyTokenLimit])
+
+    // Helper to increment token count
+    const incrementTokenCount = useCallback((tokens: number): void => {
+        // Guard against NaN tokens
+        if (!Number.isFinite(tokens) || tokens <= 0) return
+
+        let count = parseInt(
+            localStorage.getItem(STORAGE_TOKEN_COUNT_KEY) || "0",
+            10,
+        )
+        // Guard against NaN count
+        if (Number.isNaN(count)) count = 0
+
+        localStorage.setItem(STORAGE_TOKEN_COUNT_KEY, String(count + tokens))
+    }, [])
+
+    // Helper to show token limit toast
+    const showTokenLimitToast = useCallback(
+        (used: number) => {
+            toast.custom(
+                (t) => (
+                    <QuotaLimitToast
+                        type="token"
+                        used={used}
+                        limit={dailyTokenLimit}
+                        onDismiss={() => toast.dismiss(t)}
+                    />
+                ),
+                { duration: 15000 },
+            )
+        },
+        [dailyTokenLimit],
+    )
+
+    // Helper to check TPM (tokens per minute) limit
+    // Note: This only READS, doesn't write. incrementTPMCount handles writes.
+    const checkTPMLimit = useCallback((): {
+        allowed: boolean
+        remaining: number
+        used: number
+    } => {
+        if (tpmLimit <= 0) return { allowed: true, remaining: -1, used: 0 }
+
+        const currentMinute = Math.floor(Date.now() / 60000).toString()
+        const storedMinute = localStorage.getItem(STORAGE_TPM_MINUTE_KEY)
+        let count = parseInt(
+            localStorage.getItem(STORAGE_TPM_COUNT_KEY) || "0",
+            10,
+        )
+
+        // Guard against NaN
+        if (Number.isNaN(count)) count = 0
+
+        // If we're in a new minute, treat count as 0 (will be reset on next increment)
+        if (storedMinute !== currentMinute) {
+            count = 0
+        }
+
+        return {
+            allowed: count < tpmLimit,
+            remaining: tpmLimit - count,
+            used: count,
+        }
+    }, [tpmLimit])
+
+    // Helper to increment TPM count
+    const incrementTPMCount = useCallback((tokens: number): void => {
+        // Guard against NaN tokens
+        if (!Number.isFinite(tokens) || tokens <= 0) return
+
+        const currentMinute = Math.floor(Date.now() / 60000).toString()
+        const storedMinute = localStorage.getItem(STORAGE_TPM_MINUTE_KEY)
+        let count = parseInt(
+            localStorage.getItem(STORAGE_TPM_COUNT_KEY) || "0",
+            10,
+        )
+
+        // Guard against NaN
+        if (Number.isNaN(count)) count = 0
+
+        // Reset if we're in a new minute
+        if (storedMinute !== currentMinute) {
+            count = 0
+            localStorage.setItem(STORAGE_TPM_MINUTE_KEY, currentMinute)
+        }
+
+        localStorage.setItem(STORAGE_TPM_COUNT_KEY, String(count + tokens))
+    }, [])
+
+    // Helper to show TPM limit toast
+    const showTPMLimitToast = useCallback(() => {
+        const limitDisplay =
+            tpmLimit >= 1000 ? `${tpmLimit / 1000}k` : String(tpmLimit)
+        toast.error(
+            `Rate limit reached (${limitDisplay} tokens/min). Please wait 60 seconds before sending another request.`,
+            { duration: 8000 },
+        )
+    }, [tpmLimit])
+
    // Generate a unique session ID for Langfuse tracing (restore from localStorage if available)
    const [sessionId, setSessionId] = useState(() => {
        if (typeof window !== "undefined") {
@@ -341,6 +479,26 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
                setShowSettingsDialog(true)
            }
        },
+        onFinish: ({ message }) => {
+            // Track actual token usage from server metadata
+            const metadata = message?.metadata as
+                | Record<string, unknown>
+                | undefined
+            if (metadata) {
+                // Use Number.isFinite to guard against NaN (typeof NaN === 'number' is true)
+                const inputTokens = Number.isFinite(metadata.inputTokens)
+                    ? (metadata.inputTokens as number)
+                    : 0
+                const outputTokens = Number.isFinite(metadata.outputTokens)
+                    ? (metadata.outputTokens as number)
+                    : 0
+                const actualTokens = inputTokens + outputTokens
+                if (actualTokens > 0) {
+                    incrementTokenCount(actualTokens)
+                    incrementTPMCount(actualTokens)
+                }
+            }
+        },
        // Auto-resubmit when all tool results are available (including errors)
        // This enables the model to retry when a tool returns an error
        sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,
@@ -585,6 +743,20 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
                    return
                }

+                // Check daily token limit (actual usage tracked after response)
+                const tokenLimitCheck = checkTokenLimit()
+                if (!tokenLimitCheck.allowed) {
+                    showTokenLimitToast(tokenLimitCheck.used)
+                    return
+                }
+
+                // Check TPM (tokens per minute) limit
+                const tpmCheck = checkTPMLimit()
+                if (!tpmCheck.allowed) {
+                    showTPMLimitToast()
+                    return
+                }
+
                const accessCode =
                    localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || ""
                sendMessage(
@@ -601,6 +773,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
                )

                incrementRequestCount()
+                // Token count is tracked in onFinish with actual server usage
                setInput("")
                setFiles([])
            } catch (error) {
@@ -679,6 +852,20 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
            return
        }

+        // Check daily token limit (actual usage tracked after response)
+        const tokenLimitCheck = checkTokenLimit()
+        if (!tokenLimitCheck.allowed) {
+            showTokenLimitToast(tokenLimitCheck.used)
+            return
+        }
+
+        // Check TPM (tokens per minute) limit
+        const tpmCheck = checkTPMLimit()
+        if (!tpmCheck.allowed) {
+            showTPMLimitToast()
+            return
+        }
+
        // Now send the message after state is guaranteed to be updated
        const accessCode = localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || ""
        sendMessage(
@@ -695,6 +882,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
        )

        incrementRequestCount()
+        // Token count is tracked in onFinish with actual server usage
    }

    const handleEditMessage = async (messageIndex: number, newText: string) => {
@@ -750,6 +938,20 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
            return
        }

+        // Check daily token limit (actual usage tracked after response)
+        const tokenLimitCheck = checkTokenLimit()
+        if (!tokenLimitCheck.allowed) {
+            showTokenLimitToast(tokenLimitCheck.used)
+            return
+        }
+
+        // Check TPM (tokens per minute) limit
+        const tpmCheck = checkTPMLimit()
+        if (!tpmCheck.allowed) {
+            showTPMLimitToast()
+            return
+        }
+
        // Now send the edited message after state is guaranteed to be updated
        const accessCode = localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || ""
        sendMessage(
@@ -766,6 +968,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
        )

        incrementRequestCount()
+        // Token count is tracked in onFinish with actual server usage
    }

    // Collapsed view (desktop only)