diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts index dd2fbae..6df3b0f 100644 --- a/app/api/chat/route.ts +++ b/app/api/chat/route.ts @@ -189,32 +189,11 @@ async function handleChatRequest(req: Request): Promise { const textPart = lastMessage.parts?.find((p: any) => p.type === "text") const filePart = lastMessage.parts?.find((p: any) => p.type === "file") - console.log("[Cache DEBUG] textPart?.text:", textPart?.text) - console.log("[Cache DEBUG] hasFilePart:", !!filePart) - const cached = findCachedResponse(textPart?.text || "", !!filePart) - console.log("[Cache DEBUG] cached found:", !!cached) - if (cached) { - console.log( - "[Cache] Returning cached response for:", - textPart?.text, - ) return createCachedStreamResponse(cached.xml) - } else { - console.log("[Cache DEBUG] No cache match - checking why...") - console.log( - "[Cache DEBUG] Exact promptText:", - JSON.stringify(textPart?.text), - ) } - } else { - console.log("[Cache DEBUG] Skipping cache check - conditions not met") - if (!isFirstMessage) - console.log("[Cache DEBUG] Reason: not first message") - if (!isEmptyDiagram) - console.log("[Cache DEBUG] Reason: diagram not empty") } // === CACHE CHECK END === @@ -243,28 +222,6 @@ ${lastMessageText} // Convert UIMessages to ModelMessages and add system message const modelMessages = convertToModelMessages(messages) - // Debug: log raw messages to see what's coming in - console.log( - "[DEBUG] Raw UI messages:", - JSON.stringify( - messages.map((m: any, i: number) => ({ - index: i, - role: m.role, - partsCount: m.parts?.length, - parts: m.parts?.map((p: any) => ({ - type: p.type, - toolName: p.toolName, - toolCallId: p.toolCallId, - state: p.state, - inputType: p.input ? typeof p.input : undefined, - input: p.input, - })), - })), - null, - 2, - ), - ) - // Fix tool call inputs for Bedrock API (requires JSON objects, not strings) const fixedMessages = fixToolCallInputs(modelMessages) @@ -383,14 +340,8 @@ ${lastMessageText} } return null }, - onFinish: ({ text, usage, providerMetadata }) => { - console.log( - "[Cache] Full providerMetadata:", - JSON.stringify(providerMetadata, null, 2), - ) - console.log("[Cache] Usage:", JSON.stringify(usage, null, 2)) + onFinish: ({ text, usage }) => { // Pass usage to Langfuse (Bedrock streaming doesn't auto-report tokens to telemetry) - // AI SDK uses inputTokens/outputTokens, Langfuse expects promptTokens/completionTokens setTraceOutput(text, { promptTokens: usage?.inputTokens, completionTokens: usage?.outputTokens, @@ -476,7 +427,28 @@ IMPORTANT: Keep edits concise: }), }) - return result.toUIMessageStreamResponse() + return result.toUIMessageStreamResponse({ + messageMetadata: ({ part }) => { + if (part.type === "finish") { + const usage = (part as any).totalUsage + if (!usage) { + console.warn( + "[messageMetadata] No usage data in finish part", + ) + return undefined + } + // Total input = non-cached + cached (these are separate counts) + // Note: cacheWriteInputTokens is not available on finish part + const totalInputTokens = + (usage.inputTokens ?? 0) + (usage.cachedInputTokens ?? 0) + return { + inputTokens: totalInputTokens, + outputTokens: usage.outputTokens ?? 0, + } + } + return undefined + }, + }) } // Wrap handler with error handling diff --git a/app/api/config/route.ts b/app/api/config/route.ts index 5e4a959..2d60adc 100644 --- a/app/api/config/route.ts +++ b/app/api/config/route.ts @@ -9,5 +9,7 @@ export async function GET() { return NextResponse.json({ accessCodeRequired: accessCodes.length > 0, dailyRequestLimit: parseInt(process.env.DAILY_REQUEST_LIMIT || "0", 10), + dailyTokenLimit: parseInt(process.env.DAILY_TOKEN_LIMIT || "0", 10), + tpmLimit: parseInt(process.env.TPM_LIMIT || "0", 10), }) } diff --git a/components/chat-panel.tsx b/components/chat-panel.tsx index 462eb4a..ad1f618 100644 --- a/components/chat-panel.tsx +++ b/components/chat-panel.tsx @@ -33,6 +33,10 @@ const STORAGE_SESSION_ID_KEY = "next-ai-draw-io-session-id" const STORAGE_DIAGRAM_XML_KEY = "next-ai-draw-io-diagram-xml" const STORAGE_REQUEST_COUNT_KEY = "next-ai-draw-io-request-count" const STORAGE_REQUEST_DATE_KEY = "next-ai-draw-io-request-date" +const STORAGE_TOKEN_COUNT_KEY = "next-ai-draw-io-token-count" +const STORAGE_TOKEN_DATE_KEY = "next-ai-draw-io-token-date" +const STORAGE_TPM_COUNT_KEY = "next-ai-draw-io-tpm-count" +const STORAGE_TPM_MINUTE_KEY = "next-ai-draw-io-tpm-minute" import { useDiagram } from "@/contexts/diagram-context" import { findCachedResponse } from "@/lib/cached-responses" @@ -98,6 +102,8 @@ export default function ChatPanel({ const [, setAccessCodeRequired] = useState(false) const [input, setInput] = useState("") const [dailyRequestLimit, setDailyRequestLimit] = useState(0) + const [dailyTokenLimit, setDailyTokenLimit] = useState(0) + const [tpmLimit, setTpmLimit] = useState(0) // Check config on mount useEffect(() => { @@ -106,6 +112,8 @@ export default function ChatPanel({ .then((data) => { setAccessCodeRequired(data.accessCodeRequired) setDailyRequestLimit(data.dailyRequestLimit || 0) + setDailyTokenLimit(data.dailyTokenLimit || 0) + setTpmLimit(data.tpmLimit || 0) }) .catch(() => setAccessCodeRequired(false)) }, []) @@ -148,7 +156,7 @@ export default function ChatPanel({ localStorage.setItem(STORAGE_REQUEST_COUNT_KEY, String(count + 1)) }, []) - // Helper to show quota limit toast + // Helper to show quota limit toast (request-based) const showQuotaLimitToast = useCallback(() => { toast.custom( (t) => ( @@ -162,6 +170,136 @@ export default function ChatPanel({ ) }, [dailyRequestLimit]) + // Helper to check daily token limit (checks if already over limit) + const checkTokenLimit = useCallback((): { + allowed: boolean + remaining: number + used: number + } => { + if (dailyTokenLimit <= 0) + return { allowed: true, remaining: -1, used: 0 } + + const today = new Date().toDateString() + const storedDate = localStorage.getItem(STORAGE_TOKEN_DATE_KEY) + let count = parseInt( + localStorage.getItem(STORAGE_TOKEN_COUNT_KEY) || "0", + 10, + ) + + // Guard against NaN (e.g., if "NaN" was stored) + if (Number.isNaN(count)) count = 0 + + if (storedDate !== today) { + count = 0 + localStorage.setItem(STORAGE_TOKEN_DATE_KEY, today) + localStorage.setItem(STORAGE_TOKEN_COUNT_KEY, "0") + } + + return { + allowed: count < dailyTokenLimit, + remaining: dailyTokenLimit - count, + used: count, + } + }, [dailyTokenLimit]) + + // Helper to increment token count + const incrementTokenCount = useCallback((tokens: number): void => { + // Guard against NaN tokens + if (!Number.isFinite(tokens) || tokens <= 0) return + + let count = parseInt( + localStorage.getItem(STORAGE_TOKEN_COUNT_KEY) || "0", + 10, + ) + // Guard against NaN count + if (Number.isNaN(count)) count = 0 + + localStorage.setItem(STORAGE_TOKEN_COUNT_KEY, String(count + tokens)) + }, []) + + // Helper to show token limit toast + const showTokenLimitToast = useCallback( + (used: number) => { + toast.custom( + (t) => ( + toast.dismiss(t)} + /> + ), + { duration: 15000 }, + ) + }, + [dailyTokenLimit], + ) + + // Helper to check TPM (tokens per minute) limit + // Note: This only READS, doesn't write. incrementTPMCount handles writes. + const checkTPMLimit = useCallback((): { + allowed: boolean + remaining: number + used: number + } => { + if (tpmLimit <= 0) return { allowed: true, remaining: -1, used: 0 } + + const currentMinute = Math.floor(Date.now() / 60000).toString() + const storedMinute = localStorage.getItem(STORAGE_TPM_MINUTE_KEY) + let count = parseInt( + localStorage.getItem(STORAGE_TPM_COUNT_KEY) || "0", + 10, + ) + + // Guard against NaN + if (Number.isNaN(count)) count = 0 + + // If we're in a new minute, treat count as 0 (will be reset on next increment) + if (storedMinute !== currentMinute) { + count = 0 + } + + return { + allowed: count < tpmLimit, + remaining: tpmLimit - count, + used: count, + } + }, [tpmLimit]) + + // Helper to increment TPM count + const incrementTPMCount = useCallback((tokens: number): void => { + // Guard against NaN tokens + if (!Number.isFinite(tokens) || tokens <= 0) return + + const currentMinute = Math.floor(Date.now() / 60000).toString() + const storedMinute = localStorage.getItem(STORAGE_TPM_MINUTE_KEY) + let count = parseInt( + localStorage.getItem(STORAGE_TPM_COUNT_KEY) || "0", + 10, + ) + + // Guard against NaN + if (Number.isNaN(count)) count = 0 + + // Reset if we're in a new minute + if (storedMinute !== currentMinute) { + count = 0 + localStorage.setItem(STORAGE_TPM_MINUTE_KEY, currentMinute) + } + + localStorage.setItem(STORAGE_TPM_COUNT_KEY, String(count + tokens)) + }, []) + + // Helper to show TPM limit toast + const showTPMLimitToast = useCallback(() => { + const limitDisplay = + tpmLimit >= 1000 ? `${tpmLimit / 1000}k` : String(tpmLimit) + toast.error( + `Rate limit reached (${limitDisplay} tokens/min). Please wait 60 seconds before sending another request.`, + { duration: 8000 }, + ) + }, [tpmLimit]) + // Generate a unique session ID for Langfuse tracing (restore from localStorage if available) const [sessionId, setSessionId] = useState(() => { if (typeof window !== "undefined") { @@ -341,6 +479,26 @@ Please retry with an adjusted search pattern or use display_diagram if retries a setShowSettingsDialog(true) } }, + onFinish: ({ message }) => { + // Track actual token usage from server metadata + const metadata = message?.metadata as + | Record + | undefined + if (metadata) { + // Use Number.isFinite to guard against NaN (typeof NaN === 'number' is true) + const inputTokens = Number.isFinite(metadata.inputTokens) + ? (metadata.inputTokens as number) + : 0 + const outputTokens = Number.isFinite(metadata.outputTokens) + ? (metadata.outputTokens as number) + : 0 + const actualTokens = inputTokens + outputTokens + if (actualTokens > 0) { + incrementTokenCount(actualTokens) + incrementTPMCount(actualTokens) + } + } + }, // Auto-resubmit when all tool results are available (including errors) // This enables the model to retry when a tool returns an error sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls, @@ -585,6 +743,20 @@ Please retry with an adjusted search pattern or use display_diagram if retries a return } + // Check daily token limit (actual usage tracked after response) + const tokenLimitCheck = checkTokenLimit() + if (!tokenLimitCheck.allowed) { + showTokenLimitToast(tokenLimitCheck.used) + return + } + + // Check TPM (tokens per minute) limit + const tpmCheck = checkTPMLimit() + if (!tpmCheck.allowed) { + showTPMLimitToast() + return + } + const accessCode = localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || "" sendMessage( @@ -601,6 +773,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a ) incrementRequestCount() + // Token count is tracked in onFinish with actual server usage setInput("") setFiles([]) } catch (error) { @@ -679,6 +852,20 @@ Please retry with an adjusted search pattern or use display_diagram if retries a return } + // Check daily token limit (actual usage tracked after response) + const tokenLimitCheck = checkTokenLimit() + if (!tokenLimitCheck.allowed) { + showTokenLimitToast(tokenLimitCheck.used) + return + } + + // Check TPM (tokens per minute) limit + const tpmCheck = checkTPMLimit() + if (!tpmCheck.allowed) { + showTPMLimitToast() + return + } + // Now send the message after state is guaranteed to be updated const accessCode = localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || "" sendMessage( @@ -695,6 +882,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a ) incrementRequestCount() + // Token count is tracked in onFinish with actual server usage } const handleEditMessage = async (messageIndex: number, newText: string) => { @@ -750,6 +938,20 @@ Please retry with an adjusted search pattern or use display_diagram if retries a return } + // Check daily token limit (actual usage tracked after response) + const tokenLimitCheck = checkTokenLimit() + if (!tokenLimitCheck.allowed) { + showTokenLimitToast(tokenLimitCheck.used) + return + } + + // Check TPM (tokens per minute) limit + const tpmCheck = checkTPMLimit() + if (!tpmCheck.allowed) { + showTPMLimitToast() + return + } + // Now send the edited message after state is guaranteed to be updated const accessCode = localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || "" sendMessage( @@ -766,6 +968,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a ) incrementRequestCount() + // Token count is tracked in onFinish with actual server usage } // Collapsed view (desktop only) diff --git a/components/quota-limit-toast.tsx b/components/quota-limit-toast.tsx index 936e6ef..a8a6988 100644 --- a/components/quota-limit-toast.tsx +++ b/components/quota-limit-toast.tsx @@ -5,16 +5,21 @@ import type React from "react" import { FaGithub } from "react-icons/fa" interface QuotaLimitToastProps { + type?: "request" | "token" used: number limit: number onDismiss: () => void } export function QuotaLimitToast({ + type = "request", used, limit, onDismiss, }: QuotaLimitToastProps) { + const isTokenLimit = type === "token" + const formatNumber = (n: number) => + n >= 1000 ? `${(n / 1000).toFixed(1)}k` : n.toString() const handleKeyDown = (e: React.KeyboardEvent) => { if (e.key === "Escape") { e.preventDefault() @@ -48,19 +53,24 @@ export function QuotaLimitToast({ />

- Daily Quota Reached + {isTokenLimit + ? "Daily Token Limit Reached" + : "Daily Quota Reached"}

- {used}/{limit} + {isTokenLimit + ? `${formatNumber(used)}/${formatNumber(limit)} tokens` + : `${used}/${limit}`} {/* Message */}

- Oops — you've reached the daily API limit for this demo! As - an indie developer covering all the API costs myself, I have - to set these limits to keep things sustainable. + Oops — you've reached the daily{" "} + {isTokenLimit ? "token" : "API"} limit for this demo! As an + indie developer covering all the API costs myself, I have to + set these limits to keep things sustainable.

The good news is that you can self-host the project in diff --git a/lib/token-counter.ts b/lib/token-counter.ts index 6531228..1f9b006 100644 --- a/lib/token-counter.ts +++ b/lib/token-counter.ts @@ -1,21 +1,22 @@ /** - * Token counting utilities using Anthropic's tokenizer + * Token counting utilities using js-tiktoken * - * This file is separate from system-prompts.ts because the @anthropic-ai/tokenizer - * package uses WebAssembly which doesn't work well with Next.js server-side rendering. - * Import this file only in scripts or client-side code, not in API routes. + * Uses cl100k_base encoding (GPT-4) which is close to Claude's tokenization. + * This is a pure JavaScript implementation, no WASM required. */ -import { countTokens } from "@anthropic-ai/tokenizer" +import { encodingForModel } from "js-tiktoken" import { DEFAULT_SYSTEM_PROMPT, EXTENDED_SYSTEM_PROMPT } from "./system-prompts" +const encoder = encodingForModel("gpt-4o") + /** - * Count the number of tokens in a text string using Anthropic's tokenizer + * Count the number of tokens in a text string * @param text - The text to count tokens for * @returns The number of tokens */ export function countTextTokens(text: string): number { - return countTokens(text) + return encoder.encode(text).length } /** @@ -28,8 +29,8 @@ export function getSystemPromptTokenCounts(): { extended: number additions: number } { - const defaultTokens = countTokens(DEFAULT_SYSTEM_PROMPT) - const extendedTokens = countTokens(EXTENDED_SYSTEM_PROMPT) + const defaultTokens = countTextTokens(DEFAULT_SYSTEM_PROMPT) + const extendedTokens = countTextTokens(EXTENDED_SYSTEM_PROMPT) return { default: defaultTokens, extended: extendedTokens, diff --git a/package-lock.json b/package-lock.json index 7e0fea0..162c312 100644 --- a/package-lock.json +++ b/package-lock.json @@ -37,6 +37,7 @@ "base-64": "^1.0.0", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", + "js-tiktoken": "^1.0.21", "jsdom": "^26.0.0", "lucide-react": "^0.483.0", "next": "^16.0.7", @@ -6290,6 +6291,26 @@ "integrity": "sha512-kwDPIFCGx0NZHog36dj+tHiwP4QMzsZ3AgMViUBKI0+V5n4U0ufTCUMhnQ04diaRI8EX/QcPfql7zlhZ7j4zgg==", "license": "MIT" }, + "node_modules/base64-js": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", + "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, "node_modules/baseline-browser-mapping": { "version": "2.8.31", "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.8.31.tgz", @@ -8851,6 +8872,15 @@ "jiti": "lib/jiti-cli.mjs" } }, + "node_modules/js-tiktoken": { + "version": "1.0.21", + "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.21.tgz", + "integrity": "sha512-biOj/6M5qdgx5TKjDnFT1ymSpM5tbd3ylwDtrQvFQSu0Z7bBYko2dF+W/aUkXUPuk6IVpRxk/3Q2sHOzGlS36g==", + "license": "MIT", + "dependencies": { + "base64-js": "^1.5.1" + } + }, "node_modules/js-tokens": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", diff --git a/package.json b/package.json index 8bfe210..0e479ab 100644 --- a/package.json +++ b/package.json @@ -41,6 +41,7 @@ "base-64": "^1.0.0", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", + "js-tiktoken": "^1.0.21", "jsdom": "^26.0.0", "lucide-react": "^0.483.0", "next": "^16.0.7",