feat: add daily token limit with actual usage tracking (#171)

* feat: add daily token limit with actual usage tracking

- Add DAILY_TOKEN_LIMIT env var for configurable daily token limit
- Track actual tokens from Bedrock API response metadata (not estimates)
- Server sends inputTokens + cachedInputTokens + outputTokens via messageMetadata
- Client increments token count in onFinish callback with actual usage
- Add NaN guards to prevent corrupted localStorage values
- Add token limit toast notification with quota display
- Remove client-side token estimation (was blocking legitimate requests)
- Switch to js-tiktoken for client compatibility (pure JS, no WASM)

* feat: add TPM (tokens per minute) rate limiting

- Add 50k tokens/min client-side rate limit
- Track tokens per minute with automatic minute rollover
- Check TPM limit after daily limits pass
- Show toast when rate limit reached
- NaN guards for localStorage values

* feat: make TPM limit configurable via TPM_LIMIT env var

* chore: restore cache debug logs

* fix: prevent race condition in TPM tracking

checkTPMLimit was resetting TPM count to 0 when checking, which
overwrote the count saved by incrementTPMCount. Now checkTPMLimit
only reads and incrementTPMCount handles all writes.

* chore: improve TPM limit error message clarity
This commit is contained in:
Dayuan Jiang
2025-12-08 18:56:34 +09:00
committed by GitHub
parent 728dda5267
commit 622829b903
7 changed files with 285 additions and 66 deletions

View File

@@ -1,21 +1,22 @@
/**
* Token counting utilities using Anthropic's tokenizer
* Token counting utilities using js-tiktoken
*
* This file is separate from system-prompts.ts because the @anthropic-ai/tokenizer
* package uses WebAssembly which doesn't work well with Next.js server-side rendering.
* Import this file only in scripts or client-side code, not in API routes.
* Uses cl100k_base encoding (GPT-4) which is close to Claude's tokenization.
* This is a pure JavaScript implementation, no WASM required.
*/
import { countTokens } from "@anthropic-ai/tokenizer"
import { encodingForModel } from "js-tiktoken"
import { DEFAULT_SYSTEM_PROMPT, EXTENDED_SYSTEM_PROMPT } from "./system-prompts"
const encoder = encodingForModel("gpt-4o")
/**
* Count the number of tokens in a text string using Anthropic's tokenizer
* Count the number of tokens in a text string
* @param text - The text to count tokens for
* @returns The number of tokens
*/
export function countTextTokens(text: string): number {
return countTokens(text)
return encoder.encode(text).length
}
/**
@@ -28,8 +29,8 @@ export function getSystemPromptTokenCounts(): {
extended: number
additions: number
} {
const defaultTokens = countTokens(DEFAULT_SYSTEM_PROMPT)
const extendedTokens = countTokens(EXTENDED_SYSTEM_PROMPT)
const defaultTokens = countTextTokens(DEFAULT_SYSTEM_PROMPT)
const extendedTokens = countTextTokens(EXTENDED_SYSTEM_PROMPT)
return {
default: defaultTokens,
extended: extendedTokens,