diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts
index dd2fbae..6df3b0f 100644
--- a/app/api/chat/route.ts
+++ b/app/api/chat/route.ts
@@ -189,32 +189,11 @@ async function handleChatRequest(req: Request): Promise<Response> {
         const textPart = lastMessage.parts?.find((p: any) => p.type === "text")
         const filePart = lastMessage.parts?.find((p: any) => p.type === "file")
 
-        console.log("[Cache DEBUG] textPart?.text:", textPart?.text)
-        console.log("[Cache DEBUG] hasFilePart:", !!filePart)
-
         const cached = findCachedResponse(textPart?.text || "", !!filePart)
 
-        console.log("[Cache DEBUG] cached found:", !!cached)
-
         if (cached) {
-            console.log(
-                "[Cache] Returning cached response for:",
-                textPart?.text,
-            )
             return createCachedStreamResponse(cached.xml)
-        } else {
-            console.log("[Cache DEBUG] No cache match - checking why...")
-            console.log(
-                "[Cache DEBUG] Exact promptText:",
-                JSON.stringify(textPart?.text),
-            )
         }
-    } else {
-        console.log("[Cache DEBUG] Skipping cache check - conditions not met")
-        if (!isFirstMessage)
-            console.log("[Cache DEBUG] Reason: not first message")
-        if (!isEmptyDiagram)
-            console.log("[Cache DEBUG] Reason: diagram not empty")
     }
     // === CACHE CHECK END ===
 
@@ -243,28 +222,6 @@ ${lastMessageText}
     // Convert UIMessages to ModelMessages and add system message
     const modelMessages = convertToModelMessages(messages)
 
-    // Debug: log raw messages to see what's coming in
-    console.log(
-        "[DEBUG] Raw UI messages:",
-        JSON.stringify(
-            messages.map((m: any, i: number) => ({
-                index: i,
-                role: m.role,
-                partsCount: m.parts?.length,
-                parts: m.parts?.map((p: any) => ({
-                    type: p.type,
-                    toolName: p.toolName,
-                    toolCallId: p.toolCallId,
-                    state: p.state,
-                    inputType: p.input ? typeof p.input : undefined,
-                    input: p.input,
-                })),
-            })),
-            null,
-            2,
-        ),
-    )
-
     // Fix tool call inputs for Bedrock API (requires JSON objects, not strings)
     const fixedMessages = fixToolCallInputs(modelMessages)
 
@@ -383,14 +340,8 @@ ${lastMessageText}
             }
             return null
         },
-        onFinish: ({ text, usage, providerMetadata }) => {
-            console.log(
-                "[Cache] Full providerMetadata:",
-                JSON.stringify(providerMetadata, null, 2),
-            )
-            console.log("[Cache] Usage:", JSON.stringify(usage, null, 2))
+        onFinish: ({ text, usage }) => {
             // Pass usage to Langfuse (Bedrock streaming doesn't auto-report tokens to telemetry)
-            // AI SDK uses inputTokens/outputTokens, Langfuse expects promptTokens/completionTokens
             setTraceOutput(text, {
                 promptTokens: usage?.inputTokens,
                 completionTokens: usage?.outputTokens,
@@ -476,7 +427,28 @@ IMPORTANT: Keep edits concise:
         }),
     })
 
-    return result.toUIMessageStreamResponse()
+    return result.toUIMessageStreamResponse({
+        messageMetadata: ({ part }) => {
+            if (part.type === "finish") {
+                const usage = (part as any).totalUsage
+                if (!usage) {
+                    console.warn(
+                        "[messageMetadata] No usage data in finish part",
+                    )
+                    return undefined
+                }
+                // Total input = non-cached + cached (these are separate counts)
+                // Note: cacheWriteInputTokens is not available on finish part
+                const totalInputTokens =
+                    (usage.inputTokens ?? 0) + (usage.cachedInputTokens ?? 0)
+                return {
+                    inputTokens: totalInputTokens,
+                    outputTokens: usage.outputTokens ?? 0,
+                }
+            }
+            return undefined
+        },
+    })
 }
 
 // Wrap handler with error handling
diff --git a/app/api/config/route.ts b/app/api/config/route.ts
index 5e4a959..2d60adc 100644
--- a/app/api/config/route.ts
+++ b/app/api/config/route.ts
@@ -9,5 +9,7 @@ export async function GET() {
     return NextResponse.json({
         accessCodeRequired: accessCodes.length > 0,
         dailyRequestLimit: parseInt(process.env.DAILY_REQUEST_LIMIT || "0", 10),
+        dailyTokenLimit: parseInt(process.env.DAILY_TOKEN_LIMIT || "0", 10),
+        tpmLimit: parseInt(process.env.TPM_LIMIT || "0", 10),
     })
 }
diff --git a/components/chat-panel.tsx b/components/chat-panel.tsx
index 462eb4a..ad1f618 100644
--- a/components/chat-panel.tsx
+++ b/components/chat-panel.tsx
@@ -33,6 +33,10 @@ const STORAGE_SESSION_ID_KEY = "next-ai-draw-io-session-id"
 const STORAGE_DIAGRAM_XML_KEY = "next-ai-draw-io-diagram-xml"
 const STORAGE_REQUEST_COUNT_KEY = "next-ai-draw-io-request-count"
 const STORAGE_REQUEST_DATE_KEY = "next-ai-draw-io-request-date"
+const STORAGE_TOKEN_COUNT_KEY = "next-ai-draw-io-token-count"
+const STORAGE_TOKEN_DATE_KEY = "next-ai-draw-io-token-date"
+const STORAGE_TPM_COUNT_KEY = "next-ai-draw-io-tpm-count"
+const STORAGE_TPM_MINUTE_KEY = "next-ai-draw-io-tpm-minute"
 
 import { useDiagram } from "@/contexts/diagram-context"
 import { findCachedResponse } from "@/lib/cached-responses"
@@ -98,6 +102,8 @@ export default function ChatPanel({
     const [, setAccessCodeRequired] = useState(false)
     const [input, setInput] = useState("")
     const [dailyRequestLimit, setDailyRequestLimit] = useState(0)
+    const [dailyTokenLimit, setDailyTokenLimit] = useState(0)
+    const [tpmLimit, setTpmLimit] = useState(0)
 
     // Check config on mount
     useEffect(() => {
@@ -106,6 +112,8 @@ export default function ChatPanel({
             .then((data) => {
                 setAccessCodeRequired(data.accessCodeRequired)
                 setDailyRequestLimit(data.dailyRequestLimit || 0)
+                setDailyTokenLimit(data.dailyTokenLimit || 0)
+                setTpmLimit(data.tpmLimit || 0)
             })
             .catch(() => setAccessCodeRequired(false))
     }, [])
@@ -148,7 +156,7 @@ export default function ChatPanel({
         localStorage.setItem(STORAGE_REQUEST_COUNT_KEY, String(count + 1))
     }, [])
 
-    // Helper to show quota limit toast
+    // Helper to show quota limit toast (request-based)
     const showQuotaLimitToast = useCallback(() => {
         toast.custom(
             (t) => (
@@ -162,6 +170,136 @@ export default function ChatPanel({
         )
     }, [dailyRequestLimit])
 
+    // Helper to check daily token limit (checks if already over limit)
+    const checkTokenLimit = useCallback((): {
+        allowed: boolean
+        remaining: number
+        used: number
+    } => {
+        if (dailyTokenLimit <= 0)
+            return { allowed: true, remaining: -1, used: 0 }
+
+        const today = new Date().toDateString()
+        const storedDate = localStorage.getItem(STORAGE_TOKEN_DATE_KEY)
+        let count = parseInt(
+            localStorage.getItem(STORAGE_TOKEN_COUNT_KEY) || "0",
+            10,
+        )
+
+        // Guard against NaN (e.g., if "NaN" was stored)
+        if (Number.isNaN(count)) count = 0
+
+        if (storedDate !== today) {
+            count = 0
+            localStorage.setItem(STORAGE_TOKEN_DATE_KEY, today)
+            localStorage.setItem(STORAGE_TOKEN_COUNT_KEY, "0")
+        }
+
+        return {
+            allowed: count < dailyTokenLimit,
+            remaining: dailyTokenLimit - count,
+            used: count,
+        }
+    }, [dailyTokenLimit])
+
+    // Helper to increment token count
+    const incrementTokenCount = useCallback((tokens: number): void => {
+        // Guard against NaN tokens
+        if (!Number.isFinite(tokens) || tokens <= 0) return
+
+        let count = parseInt(
+            localStorage.getItem(STORAGE_TOKEN_COUNT_KEY) || "0",
+            10,
+        )
+        // Guard against NaN count
+        if (Number.isNaN(count)) count = 0
+
+        localStorage.setItem(STORAGE_TOKEN_COUNT_KEY, String(count + tokens))
+    }, [])
+
+    // Helper to show token limit toast
+    const showTokenLimitToast = useCallback(
+        (used: number) => {
+            toast.custom(
+                (t) => (
+                    <QuotaLimitToast
+                        type="token"
+                        used={used}
+                        limit={dailyTokenLimit}
+                        onDismiss={() => toast.dismiss(t)}
+                    />
+                ),
+                { duration: 15000 },
+            )
+        },
+        [dailyTokenLimit],
+    )
+
+    // Helper to check TPM (tokens per minute) limit
+    // Note: This only READS, doesn't write. incrementTPMCount handles writes.
+    const checkTPMLimit = useCallback((): {
+        allowed: boolean
+        remaining: number
+        used: number
+    } => {
+        if (tpmLimit <= 0) return { allowed: true, remaining: -1, used: 0 }
+
+        const currentMinute = Math.floor(Date.now() / 60000).toString()
+        const storedMinute = localStorage.getItem(STORAGE_TPM_MINUTE_KEY)
+        let count = parseInt(
+            localStorage.getItem(STORAGE_TPM_COUNT_KEY) || "0",
+            10,
+        )
+
+        // Guard against NaN
+        if (Number.isNaN(count)) count = 0
+
+        // If we're in a new minute, treat count as 0 (will be reset on next increment)
+        if (storedMinute !== currentMinute) {
+            count = 0
+        }
+
+        return {
+            allowed: count < tpmLimit,
+            remaining: tpmLimit - count,
+            used: count,
+        }
+    }, [tpmLimit])
+
+    // Helper to increment TPM count
+    const incrementTPMCount = useCallback((tokens: number): void => {
+        // Guard against NaN tokens
+        if (!Number.isFinite(tokens) || tokens <= 0) return
+
+        const currentMinute = Math.floor(Date.now() / 60000).toString()
+        const storedMinute = localStorage.getItem(STORAGE_TPM_MINUTE_KEY)
+        let count = parseInt(
+            localStorage.getItem(STORAGE_TPM_COUNT_KEY) || "0",
+            10,
+        )
+
+        // Guard against NaN
+        if (Number.isNaN(count)) count = 0
+
+        // Reset if we're in a new minute
+        if (storedMinute !== currentMinute) {
+            count = 0
+            localStorage.setItem(STORAGE_TPM_MINUTE_KEY, currentMinute)
+        }
+
+        localStorage.setItem(STORAGE_TPM_COUNT_KEY, String(count + tokens))
+    }, [])
+
+    // Helper to show TPM limit toast
+    const showTPMLimitToast = useCallback(() => {
+        const limitDisplay =
+            tpmLimit >= 1000 ? `${tpmLimit / 1000}k` : String(tpmLimit)
+        toast.error(
+            `Rate limit reached (${limitDisplay} tokens/min). Please wait 60 seconds before sending another request.`,
+            { duration: 8000 },
+        )
+    }, [tpmLimit])
+
     // Generate a unique session ID for Langfuse tracing (restore from localStorage if available)
     const [sessionId, setSessionId] = useState(() => {
         if (typeof window !== "undefined") {
@@ -341,6 +479,26 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
                 setShowSettingsDialog(true)
             }
         },
+        onFinish: ({ message }) => {
+            // Track actual token usage from server metadata
+            const metadata = message?.metadata as
+                | Record<string, unknown>
+                | undefined
+            if (metadata) {
+                // Use Number.isFinite to guard against NaN (typeof NaN === 'number' is true)
+                const inputTokens = Number.isFinite(metadata.inputTokens)
+                    ? (metadata.inputTokens as number)
+                    : 0
+                const outputTokens = Number.isFinite(metadata.outputTokens)
+                    ? (metadata.outputTokens as number)
+                    : 0
+                const actualTokens = inputTokens + outputTokens
+                if (actualTokens > 0) {
+                    incrementTokenCount(actualTokens)
+                    incrementTPMCount(actualTokens)
+                }
+            }
+        },
         // Auto-resubmit when all tool results are available (including errors)
         // This enables the model to retry when a tool returns an error
         sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,
@@ -585,6 +743,20 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
                     return
                 }
 
+                // Check daily token limit (actual usage tracked after response)
+                const tokenLimitCheck = checkTokenLimit()
+                if (!tokenLimitCheck.allowed) {
+                    showTokenLimitToast(tokenLimitCheck.used)
+                    return
+                }
+
+                // Check TPM (tokens per minute) limit
+                const tpmCheck = checkTPMLimit()
+                if (!tpmCheck.allowed) {
+                    showTPMLimitToast()
+                    return
+                }
+
                 const accessCode =
                     localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || ""
                 sendMessage(
@@ -601,6 +773,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
                 )
 
                 incrementRequestCount()
+                // Token count is tracked in onFinish with actual server usage
                 setInput("")
                 setFiles([])
             } catch (error) {
@@ -679,6 +852,20 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
             return
         }
 
+        // Check daily token limit (actual usage tracked after response)
+        const tokenLimitCheck = checkTokenLimit()
+        if (!tokenLimitCheck.allowed) {
+            showTokenLimitToast(tokenLimitCheck.used)
+            return
+        }
+
+        // Check TPM (tokens per minute) limit
+        const tpmCheck = checkTPMLimit()
+        if (!tpmCheck.allowed) {
+            showTPMLimitToast()
+            return
+        }
+
         // Now send the message after state is guaranteed to be updated
         const accessCode = localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || ""
         sendMessage(
@@ -695,6 +882,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
         )
 
         incrementRequestCount()
+        // Token count is tracked in onFinish with actual server usage
     }
 
     const handleEditMessage = async (messageIndex: number, newText: string) => {
@@ -750,6 +938,20 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
             return
         }
 
+        // Check daily token limit (actual usage tracked after response)
+        const tokenLimitCheck = checkTokenLimit()
+        if (!tokenLimitCheck.allowed) {
+            showTokenLimitToast(tokenLimitCheck.used)
+            return
+        }
+
+        // Check TPM (tokens per minute) limit
+        const tpmCheck = checkTPMLimit()
+        if (!tpmCheck.allowed) {
+            showTPMLimitToast()
+            return
+        }
+
         // Now send the edited message after state is guaranteed to be updated
         const accessCode = localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || ""
         sendMessage(
@@ -766,6 +968,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
         )
 
         incrementRequestCount()
+        // Token count is tracked in onFinish with actual server usage
     }
 
     // Collapsed view (desktop only)
diff --git a/components/quota-limit-toast.tsx b/components/quota-limit-toast.tsx
index 936e6ef..a8a6988 100644
--- a/components/quota-limit-toast.tsx
+++ b/components/quota-limit-toast.tsx
@@ -5,16 +5,21 @@ import type React from "react"
 import { FaGithub } from "react-icons/fa"
 
 interface QuotaLimitToastProps {
+    type?: "request" | "token"
     used: number
     limit: number
     onDismiss: () => void
 }
 
 export function QuotaLimitToast({
+    type = "request",
     used,
     limit,
     onDismiss,
 }: QuotaLimitToastProps) {
+    const isTokenLimit = type === "token"
+    const formatNumber = (n: number) =>
+        n >= 1000 ? `${(n / 1000).toFixed(1)}k` : n.toString()
     const handleKeyDown = (e: React.KeyboardEvent) => {
         if (e.key === "Escape") {
             e.preventDefault()
@@ -48,19 +53,24 @@ export function QuotaLimitToast({
                     />
                 </div>
                 <h3 className="font-semibold text-foreground text-sm">
-                    Daily Quota Reached
+                    {isTokenLimit
+                        ? "Daily Token Limit Reached"
+                        : "Daily Quota Reached"}
                 </h3>
                 <span className="px-2 py-0.5 text-xs font-medium rounded-md bg-muted text-muted-foreground">
-                    {used}/{limit}
+                    {isTokenLimit
+                        ? `${formatNumber(used)}/${formatNumber(limit)} tokens`
+                        : `${used}/${limit}`}
                 </span>
             </div>
 
             {/* Message */}
             <div className="text-sm text-muted-foreground leading-relaxed mb-4 space-y-2">
                 <p>
-                    Oops — you've reached the daily API limit for this demo! As
-                    an indie developer covering all the API costs myself, I have
-                    to set these limits to keep things sustainable.
+                    Oops — you've reached the daily{" "}
+                    {isTokenLimit ? "token" : "API"} limit for this demo! As an
+                    indie developer covering all the API costs myself, I have to
+                    set these limits to keep things sustainable.
                 </p>
                 <p>
                     The good news is that you can self-host the project in
diff --git a/lib/token-counter.ts b/lib/token-counter.ts
index 6531228..1f9b006 100644
--- a/lib/token-counter.ts
+++ b/lib/token-counter.ts
@@ -1,21 +1,22 @@
 /**
- * Token counting utilities using Anthropic's tokenizer
+ * Token counting utilities using js-tiktoken
  *
- * This file is separate from system-prompts.ts because the @anthropic-ai/tokenizer
- * package uses WebAssembly which doesn't work well with Next.js server-side rendering.
- * Import this file only in scripts or client-side code, not in API routes.
+ * Uses cl100k_base encoding (GPT-4) which is close to Claude's tokenization.
+ * This is a pure JavaScript implementation, no WASM required.
  */
 
-import { countTokens } from "@anthropic-ai/tokenizer"
+import { encodingForModel } from "js-tiktoken"
 import { DEFAULT_SYSTEM_PROMPT, EXTENDED_SYSTEM_PROMPT } from "./system-prompts"
 
+const encoder = encodingForModel("gpt-4o")
+
 /**
- * Count the number of tokens in a text string using Anthropic's tokenizer
+ * Count the number of tokens in a text string
  * @param text - The text to count tokens for
  * @returns The number of tokens
  */
 export function countTextTokens(text: string): number {
-    return countTokens(text)
+    return encoder.encode(text).length
 }
 
 /**
@@ -28,8 +29,8 @@ export function getSystemPromptTokenCounts(): {
     extended: number
     additions: number
 } {
-    const defaultTokens = countTokens(DEFAULT_SYSTEM_PROMPT)
-    const extendedTokens = countTokens(EXTENDED_SYSTEM_PROMPT)
+    const defaultTokens = countTextTokens(DEFAULT_SYSTEM_PROMPT)
+    const extendedTokens = countTextTokens(EXTENDED_SYSTEM_PROMPT)
     return {
         default: defaultTokens,
         extended: extendedTokens,
diff --git a/package-lock.json b/package-lock.json
index 7e0fea0..162c312 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -37,6 +37,7 @@
                 "base-64": "^1.0.0",
                 "class-variance-authority": "^0.7.1",
                 "clsx": "^2.1.1",
+                "js-tiktoken": "^1.0.21",
                 "jsdom": "^26.0.0",
                 "lucide-react": "^0.483.0",
                 "next": "^16.0.7",
@@ -6290,6 +6291,26 @@
             "integrity": "sha512-kwDPIFCGx0NZHog36dj+tHiwP4QMzsZ3AgMViUBKI0+V5n4U0ufTCUMhnQ04diaRI8EX/QcPfql7zlhZ7j4zgg==",
             "license": "MIT"
         },
+        "node_modules/base64-js": {
+            "version": "1.5.1",
+            "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
+            "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
+            "funding": [
+                {
+                    "type": "github",
+                    "url": "https://github.com/sponsors/feross"
+                },
+                {
+                    "type": "patreon",
+                    "url": "https://www.patreon.com/feross"
+                },
+                {
+                    "type": "consulting",
+                    "url": "https://feross.org/support"
+                }
+            ],
+            "license": "MIT"
+        },
         "node_modules/baseline-browser-mapping": {
             "version": "2.8.31",
             "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.8.31.tgz",
@@ -8851,6 +8872,15 @@
                 "jiti": "lib/jiti-cli.mjs"
             }
         },
+        "node_modules/js-tiktoken": {
+            "version": "1.0.21",
+            "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.21.tgz",
+            "integrity": "sha512-biOj/6M5qdgx5TKjDnFT1ymSpM5tbd3ylwDtrQvFQSu0Z7bBYko2dF+W/aUkXUPuk6IVpRxk/3Q2sHOzGlS36g==",
+            "license": "MIT",
+            "dependencies": {
+                "base64-js": "^1.5.1"
+            }
+        },
         "node_modules/js-tokens": {
             "version": "4.0.0",
             "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
diff --git a/package.json b/package.json
index 8bfe210..0e479ab 100644
--- a/package.json
+++ b/package.json
@@ -41,6 +41,7 @@
         "base-64": "^1.0.0",
         "class-variance-authority": "^0.7.1",
         "clsx": "^2.1.1",
+        "js-tiktoken": "^1.0.21",
         "jsdom": "^26.0.0",
         "lucide-react": "^0.483.0",
         "next": "^16.0.7",