feat: add daily token limit with actual usage tracking (#171)

* feat: add daily token limit with actual usage tracking - Add DAILY_TOKEN_LIMIT env var for configurable daily token limit - Track actual tokens from Bedrock API response metadata (not estimates) - Server sends inputTokens + cachedInputTokens + outputTokens via messageMetadata - Client increments token count in onFinish callback with actual usage - Add NaN guards to prevent corrupted localStorage values - Add token limit toast notification with quota display - Remove client-side token estimation (was blocking legitimate requests) - Switch to js-tiktoken for client compatibility (pure JS, no WASM) * feat: add TPM (tokens per minute) rate limiting - Add 50k tokens/min client-side rate limit - Track tokens per minute with automatic minute rollover - Check TPM limit after daily limits pass - Show toast when rate limit reached - NaN guards for localStorage values * feat: make TPM limit configurable via TPM_LIMIT env var * chore: restore cache debug logs * fix: prevent race condition in TPM tracking checkTPMLimit was resetting TPM count to 0 when checking, which overwrote the count saved by incrementTPMCount. Now checkTPMLimit only reads and incrementTPMCount handles all writes. * chore: improve TPM limit error message clarity
2026-01-02 14:22:28 +08:00 · 2025-12-08 18:56:34 +09:00
parent 728dda5267
commit 622829b903
7 changed files with 285 additions and 66 deletions
--- a/app/api/chat/route.ts
+++ b/app/api/chat/route.ts
@@ -189,32 +189,11 @@ async function handleChatRequest(req: Request): Promise<Response> {
        const textPart = lastMessage.parts?.find((p: any) => p.type === "text")
        const filePart = lastMessage.parts?.find((p: any) => p.type === "file")
        console.log("[Cache DEBUG] textPart?.text:", textPart?.text)
        console.log("[Cache DEBUG] hasFilePart:", !!filePart)
        const cached = findCachedResponse(textPart?.text || "", !!filePart)
        console.log("[Cache DEBUG] cached found:", !!cached)
        if (cached) {
            console.log(
                "[Cache] Returning cached response for:",
                textPart?.text,
            )
            return createCachedStreamResponse(cached.xml)
        } else {
            console.log("[Cache DEBUG] No cache match - checking why...")
            console.log(
                "[Cache DEBUG] Exact promptText:",
                JSON.stringify(textPart?.text),
            )
        }
    } else {
        console.log("[Cache DEBUG] Skipping cache check - conditions not met")
        if (!isFirstMessage)
            console.log("[Cache DEBUG] Reason: not first message")
        if (!isEmptyDiagram)
            console.log("[Cache DEBUG] Reason: diagram not empty")
    }
    // === CACHE CHECK END ===
@@ -243,28 +222,6 @@ ${lastMessageText}
    // Convert UIMessages to ModelMessages and add system message
    const modelMessages = convertToModelMessages(messages)
    // Debug: log raw messages to see what's coming in
    console.log(
        "[DEBUG] Raw UI messages:",
        JSON.stringify(
            messages.map((m: any, i: number) => ({
                index: i,
                role: m.role,
                partsCount: m.parts?.length,
                parts: m.parts?.map((p: any) => ({
                    type: p.type,
                    toolName: p.toolName,
                    toolCallId: p.toolCallId,
                    state: p.state,
                    inputType: p.input ? typeof p.input : undefined,
                    input: p.input,
                })),
            })),
            null,
            2,
        ),
    )
    // Fix tool call inputs for Bedrock API (requires JSON objects, not strings)
    const fixedMessages = fixToolCallInputs(modelMessages)
@@ -383,14 +340,8 @@ ${lastMessageText}
            }
            return null
        },
-        onFinish: ({ text, usage, providerMetadata }) => {
+        onFinish: ({ text, usage }) => {
            console.log(
                "[Cache] Full providerMetadata:",
                JSON.stringify(providerMetadata, null, 2),
            )
            console.log("[Cache] Usage:", JSON.stringify(usage, null, 2))
            // Pass usage to Langfuse (Bedrock streaming doesn't auto-report tokens to telemetry)
            // AI SDK uses inputTokens/outputTokens, Langfuse expects promptTokens/completionTokens
            setTraceOutput(text, {
                promptTokens: usage?.inputTokens,
                completionTokens: usage?.outputTokens,
@@ -476,7 +427,28 @@ IMPORTANT: Keep edits concise:
        }),
    })
-    return result.toUIMessageStreamResponse()
+    return result.toUIMessageStreamResponse({
        messageMetadata: ({ part }) => {
            if (part.type === "finish") {
                const usage = (part as any).totalUsage
                if (!usage) {
                    console.warn(
                        "[messageMetadata] No usage data in finish part",
                    )
                    return undefined
                }
                // Total input = non-cached + cached (these are separate counts)
                // Note: cacheWriteInputTokens is not available on finish part
                const totalInputTokens =
                    (usage.inputTokens ?? 0) + (usage.cachedInputTokens ?? 0)
                return {
                    inputTokens: totalInputTokens,
                    outputTokens: usage.outputTokens ?? 0,
                }
            }
            return undefined
        },
    })
 }
 // Wrap handler with error handling
--- a/app/api/config/route.ts
+++ b/app/api/config/route.ts
@@ -9,5 +9,7 @@ export async function GET() {
    return NextResponse.json({
        accessCodeRequired: accessCodes.length > 0,
        dailyRequestLimit: parseInt(process.env.DAILY_REQUEST_LIMIT || "0", 10),
        dailyTokenLimit: parseInt(process.env.DAILY_TOKEN_LIMIT || "0", 10),
        tpmLimit: parseInt(process.env.TPM_LIMIT || "0", 10),
    })
 }
--- a/components/chat-panel.tsx
+++ b/components/chat-panel.tsx
@@ -33,6 +33,10 @@ const STORAGE_SESSION_ID_KEY = "next-ai-draw-io-session-id"
 const STORAGE_DIAGRAM_XML_KEY = "next-ai-draw-io-diagram-xml"
 const STORAGE_REQUEST_COUNT_KEY = "next-ai-draw-io-request-count"
 const STORAGE_REQUEST_DATE_KEY = "next-ai-draw-io-request-date"
 const STORAGE_TOKEN_COUNT_KEY = "next-ai-draw-io-token-count"
 const STORAGE_TOKEN_DATE_KEY = "next-ai-draw-io-token-date"
 const STORAGE_TPM_COUNT_KEY = "next-ai-draw-io-tpm-count"
 const STORAGE_TPM_MINUTE_KEY = "next-ai-draw-io-tpm-minute"
 import { useDiagram } from "@/contexts/diagram-context"
 import { findCachedResponse } from "@/lib/cached-responses"
@@ -98,6 +102,8 @@ export default function ChatPanel({
    const [, setAccessCodeRequired] = useState(false)
    const [input, setInput] = useState("")
    const [dailyRequestLimit, setDailyRequestLimit] = useState(0)
    const [dailyTokenLimit, setDailyTokenLimit] = useState(0)
    const [tpmLimit, setTpmLimit] = useState(0)
    // Check config on mount
    useEffect(() => {
@@ -106,6 +112,8 @@ export default function ChatPanel({
            .then((data) => {
                setAccessCodeRequired(data.accessCodeRequired)
                setDailyRequestLimit(data.dailyRequestLimit || 0)
                setDailyTokenLimit(data.dailyTokenLimit || 0)
                setTpmLimit(data.tpmLimit || 0)
            })
            .catch(() => setAccessCodeRequired(false))
    }, [])
@@ -148,7 +156,7 @@ export default function ChatPanel({
        localStorage.setItem(STORAGE_REQUEST_COUNT_KEY, String(count + 1))
    }, [])
-    // Helper to show quota limit toast
+    // Helper to show quota limit toast (request-based)
    const showQuotaLimitToast = useCallback(() => {
        toast.custom(
            (t) => (
@@ -162,6 +170,136 @@ export default function ChatPanel({
        )
    }, [dailyRequestLimit])
    // Helper to check daily token limit (checks if already over limit)
    const checkTokenLimit = useCallback((): {
        allowed: boolean
        remaining: number
        used: number
    } => {
        if (dailyTokenLimit <= 0)
            return { allowed: true, remaining: -1, used: 0 }
        const today = new Date().toDateString()
        const storedDate = localStorage.getItem(STORAGE_TOKEN_DATE_KEY)
        let count = parseInt(
            localStorage.getItem(STORAGE_TOKEN_COUNT_KEY) || "0",
            10,
        )
        // Guard against NaN (e.g., if "NaN" was stored)
        if (Number.isNaN(count)) count = 0
        if (storedDate !== today) {
            count = 0
            localStorage.setItem(STORAGE_TOKEN_DATE_KEY, today)
            localStorage.setItem(STORAGE_TOKEN_COUNT_KEY, "0")
        }
        return {
            allowed: count < dailyTokenLimit,
            remaining: dailyTokenLimit - count,
            used: count,
        }
    }, [dailyTokenLimit])
    // Helper to increment token count
    const incrementTokenCount = useCallback((tokens: number): void => {
        // Guard against NaN tokens
        if (!Number.isFinite(tokens) || tokens <= 0) return
        let count = parseInt(
            localStorage.getItem(STORAGE_TOKEN_COUNT_KEY) || "0",
            10,
        )
        // Guard against NaN count
        if (Number.isNaN(count)) count = 0
        localStorage.setItem(STORAGE_TOKEN_COUNT_KEY, String(count + tokens))
    }, [])
    // Helper to show token limit toast
    const showTokenLimitToast = useCallback(
        (used: number) => {
            toast.custom(
                (t) => (
                    <QuotaLimitToast
                        type="token"
                        used={used}
                        limit={dailyTokenLimit}
                        onDismiss={() => toast.dismiss(t)}
                    />
                ),
                { duration: 15000 },
            )
        },
        [dailyTokenLimit],
    )
    // Helper to check TPM (tokens per minute) limit
    // Note: This only READS, doesn't write. incrementTPMCount handles writes.
    const checkTPMLimit = useCallback((): {
        allowed: boolean
        remaining: number
        used: number
    } => {
        if (tpmLimit <= 0) return { allowed: true, remaining: -1, used: 0 }
        const currentMinute = Math.floor(Date.now() / 60000).toString()
        const storedMinute = localStorage.getItem(STORAGE_TPM_MINUTE_KEY)
        let count = parseInt(
            localStorage.getItem(STORAGE_TPM_COUNT_KEY) || "0",
            10,
        )
        // Guard against NaN
        if (Number.isNaN(count)) count = 0
        // If we're in a new minute, treat count as 0 (will be reset on next increment)
        if (storedMinute !== currentMinute) {
            count = 0
        }
        return {
            allowed: count < tpmLimit,
            remaining: tpmLimit - count,
            used: count,
        }
    }, [tpmLimit])
    // Helper to increment TPM count
    const incrementTPMCount = useCallback((tokens: number): void => {
        // Guard against NaN tokens
        if (!Number.isFinite(tokens) || tokens <= 0) return
        const currentMinute = Math.floor(Date.now() / 60000).toString()
        const storedMinute = localStorage.getItem(STORAGE_TPM_MINUTE_KEY)
        let count = parseInt(
            localStorage.getItem(STORAGE_TPM_COUNT_KEY) || "0",
            10,
        )
        // Guard against NaN
        if (Number.isNaN(count)) count = 0
        // Reset if we're in a new minute
        if (storedMinute !== currentMinute) {
            count = 0
            localStorage.setItem(STORAGE_TPM_MINUTE_KEY, currentMinute)
        }
        localStorage.setItem(STORAGE_TPM_COUNT_KEY, String(count + tokens))
    }, [])
    // Helper to show TPM limit toast
    const showTPMLimitToast = useCallback(() => {
        const limitDisplay =
            tpmLimit >= 1000 ? `${tpmLimit / 1000}k` : String(tpmLimit)
        toast.error(
            `Rate limit reached (${limitDisplay} tokens/min). Please wait 60 seconds before sending another request.`,
            { duration: 8000 },
        )
    }, [tpmLimit])
    // Generate a unique session ID for Langfuse tracing (restore from localStorage if available)
    const [sessionId, setSessionId] = useState(() => {
        if (typeof window !== "undefined") {
@@ -341,6 +479,26 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
                setShowSettingsDialog(true)
            }
        },
        onFinish: ({ message }) => {
            // Track actual token usage from server metadata
            const metadata = message?.metadata as
                | Record<string, unknown>
                | undefined
            if (metadata) {
                // Use Number.isFinite to guard against NaN (typeof NaN === 'number' is true)
                const inputTokens = Number.isFinite(metadata.inputTokens)
                    ? (metadata.inputTokens as number)
                    : 0
                const outputTokens = Number.isFinite(metadata.outputTokens)
                    ? (metadata.outputTokens as number)
                    : 0
                const actualTokens = inputTokens + outputTokens
                if (actualTokens > 0) {
                    incrementTokenCount(actualTokens)
                    incrementTPMCount(actualTokens)
                }
            }
        },
        // Auto-resubmit when all tool results are available (including errors)
        // This enables the model to retry when a tool returns an error
        sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,
@@ -585,6 +743,20 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
                    return
                }
                // Check daily token limit (actual usage tracked after response)
                const tokenLimitCheck = checkTokenLimit()
                if (!tokenLimitCheck.allowed) {
                    showTokenLimitToast(tokenLimitCheck.used)
                    return
                }
                // Check TPM (tokens per minute) limit
                const tpmCheck = checkTPMLimit()
                if (!tpmCheck.allowed) {
                    showTPMLimitToast()
                    return
                }
                const accessCode =
                    localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || ""
                sendMessage(
@@ -601,6 +773,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
                )
                incrementRequestCount()
                // Token count is tracked in onFinish with actual server usage
                setInput("")
                setFiles([])
            } catch (error) {
@@ -679,6 +852,20 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
            return
        }
        // Check daily token limit (actual usage tracked after response)
        const tokenLimitCheck = checkTokenLimit()
        if (!tokenLimitCheck.allowed) {
            showTokenLimitToast(tokenLimitCheck.used)
            return
        }
        // Check TPM (tokens per minute) limit
        const tpmCheck = checkTPMLimit()
        if (!tpmCheck.allowed) {
            showTPMLimitToast()
            return
        }
        // Now send the message after state is guaranteed to be updated
        const accessCode = localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || ""
        sendMessage(
@@ -695,6 +882,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
        )
        incrementRequestCount()
        // Token count is tracked in onFinish with actual server usage
    }
    const handleEditMessage = async (messageIndex: number, newText: string) => {
@@ -750,6 +938,20 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
            return
        }
        // Check daily token limit (actual usage tracked after response)
        const tokenLimitCheck = checkTokenLimit()
        if (!tokenLimitCheck.allowed) {
            showTokenLimitToast(tokenLimitCheck.used)
            return
        }
        // Check TPM (tokens per minute) limit
        const tpmCheck = checkTPMLimit()
        if (!tpmCheck.allowed) {
            showTPMLimitToast()
            return
        }
        // Now send the edited message after state is guaranteed to be updated
        const accessCode = localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || ""
        sendMessage(
@@ -766,6 +968,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
        )
        incrementRequestCount()
        // Token count is tracked in onFinish with actual server usage
    }
    // Collapsed view (desktop only)
--- a/components/quota-limit-toast.tsx
+++ b/components/quota-limit-toast.tsx
@@ -5,16 +5,21 @@ import type React from "react"
 import { FaGithub } from "react-icons/fa"
 interface QuotaLimitToastProps {
    type?: "request" | "token"
    used: number
    limit: number
    onDismiss: () => void
 }
 export function QuotaLimitToast({
    type = "request",
    used,
    limit,
    onDismiss,
 }: QuotaLimitToastProps) {
    const isTokenLimit = type === "token"
    const formatNumber = (n: number) =>
        n >= 1000 ? `${(n / 1000).toFixed(1)}k` : n.toString()
    const handleKeyDown = (e: React.KeyboardEvent) => {
        if (e.key === "Escape") {
            e.preventDefault()
@@ -48,19 +53,24 @@ export function QuotaLimitToast({
                    />
                </div>
                <h3 className="font-semibold text-foreground text-sm">
-                    Daily Quota Reached
+                    {isTokenLimit
                        ? "Daily Token Limit Reached"
                        : "Daily Quota Reached"}
                </h3>
                <span className="px-2 py-0.5 text-xs font-medium rounded-md bg-muted text-muted-foreground">
-                    {used}/{limit}
+                    {isTokenLimit
                        ? `${formatNumber(used)}/${formatNumber(limit)} tokens`
                        : `${used}/${limit}`}
                </span>
            </div>
            {/* Message */}
            <div className="text-sm text-muted-foreground leading-relaxed mb-4 space-y-2">
                <p>
-                    Oops — you've reached the daily API limit for this demo! As
+                    Oops — you've reached the daily{" "}
-                    an indie developer covering all the API costs myself, I have
+                    {isTokenLimit ? "token" : "API"} limit for this demo! As an
-                    to set these limits to keep things sustainable.
+                    indie developer covering all the API costs myself, I have to
                    set these limits to keep things sustainable.
                </p>
                <p>
                    The good news is that you can self-host the project in
--- a/lib/token-counter.ts
+++ b/lib/token-counter.ts
@@ -1,21 +1,22 @@
 /**
- * Token counting utilities using Anthropic's tokenizer
+ * Token counting utilities using js-tiktoken
 *
- * This file is separate from system-prompts.ts because the @anthropic-ai/tokenizer
+ * Uses cl100k_base encoding (GPT-4) which is close to Claude's tokenization.
- * package uses WebAssembly which doesn't work well with Next.js server-side rendering.
+ * This is a pure JavaScript implementation, no WASM required.
 * Import this file only in scripts or client-side code, not in API routes.
 */
-import { countTokens } from "@anthropic-ai/tokenizer"
+import { encodingForModel } from "js-tiktoken"
 import { DEFAULT_SYSTEM_PROMPT, EXTENDED_SYSTEM_PROMPT } from "./system-prompts"
 const encoder = encodingForModel("gpt-4o")
 /**
- * Count the number of tokens in a text string using Anthropic's tokenizer
+ * Count the number of tokens in a text string
 * @param text - The text to count tokens for
 * @returns The number of tokens
 */
 export function countTextTokens(text: string): number {
-    return countTokens(text)
+    return encoder.encode(text).length
 }
 /**
@@ -28,8 +29,8 @@ export function getSystemPromptTokenCounts(): {
    extended: number
    additions: number
 } {
-    const defaultTokens = countTokens(DEFAULT_SYSTEM_PROMPT)
+    const defaultTokens = countTextTokens(DEFAULT_SYSTEM_PROMPT)
-    const extendedTokens = countTokens(EXTENDED_SYSTEM_PROMPT)
+    const extendedTokens = countTextTokens(EXTENDED_SYSTEM_PROMPT)
    return {
        default: defaultTokens,
        extended: extendedTokens,
--- a/package-lock.json
+++ b/package-lock.json
@@ -37,6 +37,7 @@
                "base-64": "^1.0.0",
                "class-variance-authority": "^0.7.1",
                "clsx": "^2.1.1",
                "js-tiktoken": "^1.0.21",
                "jsdom": "^26.0.0",
                "lucide-react": "^0.483.0",
                "next": "^16.0.7",
@@ -6290,6 +6291,26 @@
            "integrity": "sha512-kwDPIFCGx0NZHog36dj+tHiwP4QMzsZ3AgMViUBKI0+V5n4U0ufTCUMhnQ04diaRI8EX/QcPfql7zlhZ7j4zgg==",
            "license": "MIT"
        },
        "node_modules/base64-js": {
            "version": "1.5.1",
            "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
            "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
            "funding": [
                {
                    "type": "github",
                    "url": "https://github.com/sponsors/feross"
                },
                {
                    "type": "patreon",
                    "url": "https://www.patreon.com/feross"
                },
                {
                    "type": "consulting",
                    "url": "https://feross.org/support"
                }
            ],
            "license": "MIT"
        },
        "node_modules/baseline-browser-mapping": {
            "version": "2.8.31",
            "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.8.31.tgz",
@@ -8851,6 +8872,15 @@
                "jiti": "lib/jiti-cli.mjs"
            }
        },
        "node_modules/js-tiktoken": {
            "version": "1.0.21",
            "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.21.tgz",
            "integrity": "sha512-biOj/6M5qdgx5TKjDnFT1ymSpM5tbd3ylwDtrQvFQSu0Z7bBYko2dF+W/aUkXUPuk6IVpRxk/3Q2sHOzGlS36g==",
            "license": "MIT",
            "dependencies": {
                "base64-js": "^1.5.1"
            }
        },
        "node_modules/js-tokens": {
            "version": "4.0.0",
            "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
--- a/package.json
+++ b/package.json
@@ -41,6 +41,7 @@
        "base-64": "^1.0.0",
        "class-variance-authority": "^0.7.1",
        "clsx": "^2.1.1",
        "js-tiktoken": "^1.0.21",
        "jsdom": "^26.0.0",
        "lucide-react": "^0.483.0",
        "next": "^16.0.7",