mirror of
https://github.com/DayuanJiang/next-ai-draw-io.git
synced 2026-01-02 14:22:28 +08:00
feat: add daily token limit with actual usage tracking (#171)
* feat: add daily token limit with actual usage tracking - Add DAILY_TOKEN_LIMIT env var for configurable daily token limit - Track actual tokens from Bedrock API response metadata (not estimates) - Server sends inputTokens + cachedInputTokens + outputTokens via messageMetadata - Client increments token count in onFinish callback with actual usage - Add NaN guards to prevent corrupted localStorage values - Add token limit toast notification with quota display - Remove client-side token estimation (was blocking legitimate requests) - Switch to js-tiktoken for client compatibility (pure JS, no WASM) * feat: add TPM (tokens per minute) rate limiting - Add 50k tokens/min client-side rate limit - Track tokens per minute with automatic minute rollover - Check TPM limit after daily limits pass - Show toast when rate limit reached - NaN guards for localStorage values * feat: make TPM limit configurable via TPM_LIMIT env var * chore: restore cache debug logs * fix: prevent race condition in TPM tracking checkTPMLimit was resetting TPM count to 0 when checking, which overwrote the count saved by incrementTPMCount. Now checkTPMLimit only reads and incrementTPMCount handles all writes. * chore: improve TPM limit error message clarity
This commit is contained in:
@@ -189,32 +189,11 @@ async function handleChatRequest(req: Request): Promise<Response> {
|
|||||||
const textPart = lastMessage.parts?.find((p: any) => p.type === "text")
|
const textPart = lastMessage.parts?.find((p: any) => p.type === "text")
|
||||||
const filePart = lastMessage.parts?.find((p: any) => p.type === "file")
|
const filePart = lastMessage.parts?.find((p: any) => p.type === "file")
|
||||||
|
|
||||||
console.log("[Cache DEBUG] textPart?.text:", textPart?.text)
|
|
||||||
console.log("[Cache DEBUG] hasFilePart:", !!filePart)
|
|
||||||
|
|
||||||
const cached = findCachedResponse(textPart?.text || "", !!filePart)
|
const cached = findCachedResponse(textPart?.text || "", !!filePart)
|
||||||
|
|
||||||
console.log("[Cache DEBUG] cached found:", !!cached)
|
|
||||||
|
|
||||||
if (cached) {
|
if (cached) {
|
||||||
console.log(
|
|
||||||
"[Cache] Returning cached response for:",
|
|
||||||
textPart?.text,
|
|
||||||
)
|
|
||||||
return createCachedStreamResponse(cached.xml)
|
return createCachedStreamResponse(cached.xml)
|
||||||
} else {
|
|
||||||
console.log("[Cache DEBUG] No cache match - checking why...")
|
|
||||||
console.log(
|
|
||||||
"[Cache DEBUG] Exact promptText:",
|
|
||||||
JSON.stringify(textPart?.text),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
console.log("[Cache DEBUG] Skipping cache check - conditions not met")
|
|
||||||
if (!isFirstMessage)
|
|
||||||
console.log("[Cache DEBUG] Reason: not first message")
|
|
||||||
if (!isEmptyDiagram)
|
|
||||||
console.log("[Cache DEBUG] Reason: diagram not empty")
|
|
||||||
}
|
}
|
||||||
// === CACHE CHECK END ===
|
// === CACHE CHECK END ===
|
||||||
|
|
||||||
@@ -243,28 +222,6 @@ ${lastMessageText}
|
|||||||
// Convert UIMessages to ModelMessages and add system message
|
// Convert UIMessages to ModelMessages and add system message
|
||||||
const modelMessages = convertToModelMessages(messages)
|
const modelMessages = convertToModelMessages(messages)
|
||||||
|
|
||||||
// Debug: log raw messages to see what's coming in
|
|
||||||
console.log(
|
|
||||||
"[DEBUG] Raw UI messages:",
|
|
||||||
JSON.stringify(
|
|
||||||
messages.map((m: any, i: number) => ({
|
|
||||||
index: i,
|
|
||||||
role: m.role,
|
|
||||||
partsCount: m.parts?.length,
|
|
||||||
parts: m.parts?.map((p: any) => ({
|
|
||||||
type: p.type,
|
|
||||||
toolName: p.toolName,
|
|
||||||
toolCallId: p.toolCallId,
|
|
||||||
state: p.state,
|
|
||||||
inputType: p.input ? typeof p.input : undefined,
|
|
||||||
input: p.input,
|
|
||||||
})),
|
|
||||||
})),
|
|
||||||
null,
|
|
||||||
2,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
// Fix tool call inputs for Bedrock API (requires JSON objects, not strings)
|
// Fix tool call inputs for Bedrock API (requires JSON objects, not strings)
|
||||||
const fixedMessages = fixToolCallInputs(modelMessages)
|
const fixedMessages = fixToolCallInputs(modelMessages)
|
||||||
|
|
||||||
@@ -383,14 +340,8 @@ ${lastMessageText}
|
|||||||
}
|
}
|
||||||
return null
|
return null
|
||||||
},
|
},
|
||||||
onFinish: ({ text, usage, providerMetadata }) => {
|
onFinish: ({ text, usage }) => {
|
||||||
console.log(
|
|
||||||
"[Cache] Full providerMetadata:",
|
|
||||||
JSON.stringify(providerMetadata, null, 2),
|
|
||||||
)
|
|
||||||
console.log("[Cache] Usage:", JSON.stringify(usage, null, 2))
|
|
||||||
// Pass usage to Langfuse (Bedrock streaming doesn't auto-report tokens to telemetry)
|
// Pass usage to Langfuse (Bedrock streaming doesn't auto-report tokens to telemetry)
|
||||||
// AI SDK uses inputTokens/outputTokens, Langfuse expects promptTokens/completionTokens
|
|
||||||
setTraceOutput(text, {
|
setTraceOutput(text, {
|
||||||
promptTokens: usage?.inputTokens,
|
promptTokens: usage?.inputTokens,
|
||||||
completionTokens: usage?.outputTokens,
|
completionTokens: usage?.outputTokens,
|
||||||
@@ -476,7 +427,28 @@ IMPORTANT: Keep edits concise:
|
|||||||
}),
|
}),
|
||||||
})
|
})
|
||||||
|
|
||||||
return result.toUIMessageStreamResponse()
|
return result.toUIMessageStreamResponse({
|
||||||
|
messageMetadata: ({ part }) => {
|
||||||
|
if (part.type === "finish") {
|
||||||
|
const usage = (part as any).totalUsage
|
||||||
|
if (!usage) {
|
||||||
|
console.warn(
|
||||||
|
"[messageMetadata] No usage data in finish part",
|
||||||
|
)
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
// Total input = non-cached + cached (these are separate counts)
|
||||||
|
// Note: cacheWriteInputTokens is not available on finish part
|
||||||
|
const totalInputTokens =
|
||||||
|
(usage.inputTokens ?? 0) + (usage.cachedInputTokens ?? 0)
|
||||||
|
return {
|
||||||
|
inputTokens: totalInputTokens,
|
||||||
|
outputTokens: usage.outputTokens ?? 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return undefined
|
||||||
|
},
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wrap handler with error handling
|
// Wrap handler with error handling
|
||||||
|
|||||||
@@ -9,5 +9,7 @@ export async function GET() {
|
|||||||
return NextResponse.json({
|
return NextResponse.json({
|
||||||
accessCodeRequired: accessCodes.length > 0,
|
accessCodeRequired: accessCodes.length > 0,
|
||||||
dailyRequestLimit: parseInt(process.env.DAILY_REQUEST_LIMIT || "0", 10),
|
dailyRequestLimit: parseInt(process.env.DAILY_REQUEST_LIMIT || "0", 10),
|
||||||
|
dailyTokenLimit: parseInt(process.env.DAILY_TOKEN_LIMIT || "0", 10),
|
||||||
|
tpmLimit: parseInt(process.env.TPM_LIMIT || "0", 10),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -33,6 +33,10 @@ const STORAGE_SESSION_ID_KEY = "next-ai-draw-io-session-id"
|
|||||||
const STORAGE_DIAGRAM_XML_KEY = "next-ai-draw-io-diagram-xml"
|
const STORAGE_DIAGRAM_XML_KEY = "next-ai-draw-io-diagram-xml"
|
||||||
const STORAGE_REQUEST_COUNT_KEY = "next-ai-draw-io-request-count"
|
const STORAGE_REQUEST_COUNT_KEY = "next-ai-draw-io-request-count"
|
||||||
const STORAGE_REQUEST_DATE_KEY = "next-ai-draw-io-request-date"
|
const STORAGE_REQUEST_DATE_KEY = "next-ai-draw-io-request-date"
|
||||||
|
const STORAGE_TOKEN_COUNT_KEY = "next-ai-draw-io-token-count"
|
||||||
|
const STORAGE_TOKEN_DATE_KEY = "next-ai-draw-io-token-date"
|
||||||
|
const STORAGE_TPM_COUNT_KEY = "next-ai-draw-io-tpm-count"
|
||||||
|
const STORAGE_TPM_MINUTE_KEY = "next-ai-draw-io-tpm-minute"
|
||||||
|
|
||||||
import { useDiagram } from "@/contexts/diagram-context"
|
import { useDiagram } from "@/contexts/diagram-context"
|
||||||
import { findCachedResponse } from "@/lib/cached-responses"
|
import { findCachedResponse } from "@/lib/cached-responses"
|
||||||
@@ -98,6 +102,8 @@ export default function ChatPanel({
|
|||||||
const [, setAccessCodeRequired] = useState(false)
|
const [, setAccessCodeRequired] = useState(false)
|
||||||
const [input, setInput] = useState("")
|
const [input, setInput] = useState("")
|
||||||
const [dailyRequestLimit, setDailyRequestLimit] = useState(0)
|
const [dailyRequestLimit, setDailyRequestLimit] = useState(0)
|
||||||
|
const [dailyTokenLimit, setDailyTokenLimit] = useState(0)
|
||||||
|
const [tpmLimit, setTpmLimit] = useState(0)
|
||||||
|
|
||||||
// Check config on mount
|
// Check config on mount
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
@@ -106,6 +112,8 @@ export default function ChatPanel({
|
|||||||
.then((data) => {
|
.then((data) => {
|
||||||
setAccessCodeRequired(data.accessCodeRequired)
|
setAccessCodeRequired(data.accessCodeRequired)
|
||||||
setDailyRequestLimit(data.dailyRequestLimit || 0)
|
setDailyRequestLimit(data.dailyRequestLimit || 0)
|
||||||
|
setDailyTokenLimit(data.dailyTokenLimit || 0)
|
||||||
|
setTpmLimit(data.tpmLimit || 0)
|
||||||
})
|
})
|
||||||
.catch(() => setAccessCodeRequired(false))
|
.catch(() => setAccessCodeRequired(false))
|
||||||
}, [])
|
}, [])
|
||||||
@@ -148,7 +156,7 @@ export default function ChatPanel({
|
|||||||
localStorage.setItem(STORAGE_REQUEST_COUNT_KEY, String(count + 1))
|
localStorage.setItem(STORAGE_REQUEST_COUNT_KEY, String(count + 1))
|
||||||
}, [])
|
}, [])
|
||||||
|
|
||||||
// Helper to show quota limit toast
|
// Helper to show quota limit toast (request-based)
|
||||||
const showQuotaLimitToast = useCallback(() => {
|
const showQuotaLimitToast = useCallback(() => {
|
||||||
toast.custom(
|
toast.custom(
|
||||||
(t) => (
|
(t) => (
|
||||||
@@ -162,6 +170,136 @@ export default function ChatPanel({
|
|||||||
)
|
)
|
||||||
}, [dailyRequestLimit])
|
}, [dailyRequestLimit])
|
||||||
|
|
||||||
|
// Helper to check daily token limit (checks if already over limit)
|
||||||
|
const checkTokenLimit = useCallback((): {
|
||||||
|
allowed: boolean
|
||||||
|
remaining: number
|
||||||
|
used: number
|
||||||
|
} => {
|
||||||
|
if (dailyTokenLimit <= 0)
|
||||||
|
return { allowed: true, remaining: -1, used: 0 }
|
||||||
|
|
||||||
|
const today = new Date().toDateString()
|
||||||
|
const storedDate = localStorage.getItem(STORAGE_TOKEN_DATE_KEY)
|
||||||
|
let count = parseInt(
|
||||||
|
localStorage.getItem(STORAGE_TOKEN_COUNT_KEY) || "0",
|
||||||
|
10,
|
||||||
|
)
|
||||||
|
|
||||||
|
// Guard against NaN (e.g., if "NaN" was stored)
|
||||||
|
if (Number.isNaN(count)) count = 0
|
||||||
|
|
||||||
|
if (storedDate !== today) {
|
||||||
|
count = 0
|
||||||
|
localStorage.setItem(STORAGE_TOKEN_DATE_KEY, today)
|
||||||
|
localStorage.setItem(STORAGE_TOKEN_COUNT_KEY, "0")
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
allowed: count < dailyTokenLimit,
|
||||||
|
remaining: dailyTokenLimit - count,
|
||||||
|
used: count,
|
||||||
|
}
|
||||||
|
}, [dailyTokenLimit])
|
||||||
|
|
||||||
|
// Helper to increment token count
|
||||||
|
const incrementTokenCount = useCallback((tokens: number): void => {
|
||||||
|
// Guard against NaN tokens
|
||||||
|
if (!Number.isFinite(tokens) || tokens <= 0) return
|
||||||
|
|
||||||
|
let count = parseInt(
|
||||||
|
localStorage.getItem(STORAGE_TOKEN_COUNT_KEY) || "0",
|
||||||
|
10,
|
||||||
|
)
|
||||||
|
// Guard against NaN count
|
||||||
|
if (Number.isNaN(count)) count = 0
|
||||||
|
|
||||||
|
localStorage.setItem(STORAGE_TOKEN_COUNT_KEY, String(count + tokens))
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
// Helper to show token limit toast
|
||||||
|
const showTokenLimitToast = useCallback(
|
||||||
|
(used: number) => {
|
||||||
|
toast.custom(
|
||||||
|
(t) => (
|
||||||
|
<QuotaLimitToast
|
||||||
|
type="token"
|
||||||
|
used={used}
|
||||||
|
limit={dailyTokenLimit}
|
||||||
|
onDismiss={() => toast.dismiss(t)}
|
||||||
|
/>
|
||||||
|
),
|
||||||
|
{ duration: 15000 },
|
||||||
|
)
|
||||||
|
},
|
||||||
|
[dailyTokenLimit],
|
||||||
|
)
|
||||||
|
|
||||||
|
// Helper to check TPM (tokens per minute) limit
|
||||||
|
// Note: This only READS, doesn't write. incrementTPMCount handles writes.
|
||||||
|
const checkTPMLimit = useCallback((): {
|
||||||
|
allowed: boolean
|
||||||
|
remaining: number
|
||||||
|
used: number
|
||||||
|
} => {
|
||||||
|
if (tpmLimit <= 0) return { allowed: true, remaining: -1, used: 0 }
|
||||||
|
|
||||||
|
const currentMinute = Math.floor(Date.now() / 60000).toString()
|
||||||
|
const storedMinute = localStorage.getItem(STORAGE_TPM_MINUTE_KEY)
|
||||||
|
let count = parseInt(
|
||||||
|
localStorage.getItem(STORAGE_TPM_COUNT_KEY) || "0",
|
||||||
|
10,
|
||||||
|
)
|
||||||
|
|
||||||
|
// Guard against NaN
|
||||||
|
if (Number.isNaN(count)) count = 0
|
||||||
|
|
||||||
|
// If we're in a new minute, treat count as 0 (will be reset on next increment)
|
||||||
|
if (storedMinute !== currentMinute) {
|
||||||
|
count = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
allowed: count < tpmLimit,
|
||||||
|
remaining: tpmLimit - count,
|
||||||
|
used: count,
|
||||||
|
}
|
||||||
|
}, [tpmLimit])
|
||||||
|
|
||||||
|
// Helper to increment TPM count
|
||||||
|
const incrementTPMCount = useCallback((tokens: number): void => {
|
||||||
|
// Guard against NaN tokens
|
||||||
|
if (!Number.isFinite(tokens) || tokens <= 0) return
|
||||||
|
|
||||||
|
const currentMinute = Math.floor(Date.now() / 60000).toString()
|
||||||
|
const storedMinute = localStorage.getItem(STORAGE_TPM_MINUTE_KEY)
|
||||||
|
let count = parseInt(
|
||||||
|
localStorage.getItem(STORAGE_TPM_COUNT_KEY) || "0",
|
||||||
|
10,
|
||||||
|
)
|
||||||
|
|
||||||
|
// Guard against NaN
|
||||||
|
if (Number.isNaN(count)) count = 0
|
||||||
|
|
||||||
|
// Reset if we're in a new minute
|
||||||
|
if (storedMinute !== currentMinute) {
|
||||||
|
count = 0
|
||||||
|
localStorage.setItem(STORAGE_TPM_MINUTE_KEY, currentMinute)
|
||||||
|
}
|
||||||
|
|
||||||
|
localStorage.setItem(STORAGE_TPM_COUNT_KEY, String(count + tokens))
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
// Helper to show TPM limit toast
|
||||||
|
const showTPMLimitToast = useCallback(() => {
|
||||||
|
const limitDisplay =
|
||||||
|
tpmLimit >= 1000 ? `${tpmLimit / 1000}k` : String(tpmLimit)
|
||||||
|
toast.error(
|
||||||
|
`Rate limit reached (${limitDisplay} tokens/min). Please wait 60 seconds before sending another request.`,
|
||||||
|
{ duration: 8000 },
|
||||||
|
)
|
||||||
|
}, [tpmLimit])
|
||||||
|
|
||||||
// Generate a unique session ID for Langfuse tracing (restore from localStorage if available)
|
// Generate a unique session ID for Langfuse tracing (restore from localStorage if available)
|
||||||
const [sessionId, setSessionId] = useState(() => {
|
const [sessionId, setSessionId] = useState(() => {
|
||||||
if (typeof window !== "undefined") {
|
if (typeof window !== "undefined") {
|
||||||
@@ -341,6 +479,26 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
|
|||||||
setShowSettingsDialog(true)
|
setShowSettingsDialog(true)
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
onFinish: ({ message }) => {
|
||||||
|
// Track actual token usage from server metadata
|
||||||
|
const metadata = message?.metadata as
|
||||||
|
| Record<string, unknown>
|
||||||
|
| undefined
|
||||||
|
if (metadata) {
|
||||||
|
// Use Number.isFinite to guard against NaN (typeof NaN === 'number' is true)
|
||||||
|
const inputTokens = Number.isFinite(metadata.inputTokens)
|
||||||
|
? (metadata.inputTokens as number)
|
||||||
|
: 0
|
||||||
|
const outputTokens = Number.isFinite(metadata.outputTokens)
|
||||||
|
? (metadata.outputTokens as number)
|
||||||
|
: 0
|
||||||
|
const actualTokens = inputTokens + outputTokens
|
||||||
|
if (actualTokens > 0) {
|
||||||
|
incrementTokenCount(actualTokens)
|
||||||
|
incrementTPMCount(actualTokens)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
// Auto-resubmit when all tool results are available (including errors)
|
// Auto-resubmit when all tool results are available (including errors)
|
||||||
// This enables the model to retry when a tool returns an error
|
// This enables the model to retry when a tool returns an error
|
||||||
sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,
|
sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,
|
||||||
@@ -585,6 +743,20 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check daily token limit (actual usage tracked after response)
|
||||||
|
const tokenLimitCheck = checkTokenLimit()
|
||||||
|
if (!tokenLimitCheck.allowed) {
|
||||||
|
showTokenLimitToast(tokenLimitCheck.used)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check TPM (tokens per minute) limit
|
||||||
|
const tpmCheck = checkTPMLimit()
|
||||||
|
if (!tpmCheck.allowed) {
|
||||||
|
showTPMLimitToast()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
const accessCode =
|
const accessCode =
|
||||||
localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || ""
|
localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || ""
|
||||||
sendMessage(
|
sendMessage(
|
||||||
@@ -601,6 +773,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
|
|||||||
)
|
)
|
||||||
|
|
||||||
incrementRequestCount()
|
incrementRequestCount()
|
||||||
|
// Token count is tracked in onFinish with actual server usage
|
||||||
setInput("")
|
setInput("")
|
||||||
setFiles([])
|
setFiles([])
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@@ -679,6 +852,20 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check daily token limit (actual usage tracked after response)
|
||||||
|
const tokenLimitCheck = checkTokenLimit()
|
||||||
|
if (!tokenLimitCheck.allowed) {
|
||||||
|
showTokenLimitToast(tokenLimitCheck.used)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check TPM (tokens per minute) limit
|
||||||
|
const tpmCheck = checkTPMLimit()
|
||||||
|
if (!tpmCheck.allowed) {
|
||||||
|
showTPMLimitToast()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// Now send the message after state is guaranteed to be updated
|
// Now send the message after state is guaranteed to be updated
|
||||||
const accessCode = localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || ""
|
const accessCode = localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || ""
|
||||||
sendMessage(
|
sendMessage(
|
||||||
@@ -695,6 +882,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
|
|||||||
)
|
)
|
||||||
|
|
||||||
incrementRequestCount()
|
incrementRequestCount()
|
||||||
|
// Token count is tracked in onFinish with actual server usage
|
||||||
}
|
}
|
||||||
|
|
||||||
const handleEditMessage = async (messageIndex: number, newText: string) => {
|
const handleEditMessage = async (messageIndex: number, newText: string) => {
|
||||||
@@ -750,6 +938,20 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check daily token limit (actual usage tracked after response)
|
||||||
|
const tokenLimitCheck = checkTokenLimit()
|
||||||
|
if (!tokenLimitCheck.allowed) {
|
||||||
|
showTokenLimitToast(tokenLimitCheck.used)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check TPM (tokens per minute) limit
|
||||||
|
const tpmCheck = checkTPMLimit()
|
||||||
|
if (!tpmCheck.allowed) {
|
||||||
|
showTPMLimitToast()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// Now send the edited message after state is guaranteed to be updated
|
// Now send the edited message after state is guaranteed to be updated
|
||||||
const accessCode = localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || ""
|
const accessCode = localStorage.getItem(STORAGE_ACCESS_CODE_KEY) || ""
|
||||||
sendMessage(
|
sendMessage(
|
||||||
@@ -766,6 +968,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
|
|||||||
)
|
)
|
||||||
|
|
||||||
incrementRequestCount()
|
incrementRequestCount()
|
||||||
|
// Token count is tracked in onFinish with actual server usage
|
||||||
}
|
}
|
||||||
|
|
||||||
// Collapsed view (desktop only)
|
// Collapsed view (desktop only)
|
||||||
|
|||||||
@@ -5,16 +5,21 @@ import type React from "react"
|
|||||||
import { FaGithub } from "react-icons/fa"
|
import { FaGithub } from "react-icons/fa"
|
||||||
|
|
||||||
interface QuotaLimitToastProps {
|
interface QuotaLimitToastProps {
|
||||||
|
type?: "request" | "token"
|
||||||
used: number
|
used: number
|
||||||
limit: number
|
limit: number
|
||||||
onDismiss: () => void
|
onDismiss: () => void
|
||||||
}
|
}
|
||||||
|
|
||||||
export function QuotaLimitToast({
|
export function QuotaLimitToast({
|
||||||
|
type = "request",
|
||||||
used,
|
used,
|
||||||
limit,
|
limit,
|
||||||
onDismiss,
|
onDismiss,
|
||||||
}: QuotaLimitToastProps) {
|
}: QuotaLimitToastProps) {
|
||||||
|
const isTokenLimit = type === "token"
|
||||||
|
const formatNumber = (n: number) =>
|
||||||
|
n >= 1000 ? `${(n / 1000).toFixed(1)}k` : n.toString()
|
||||||
const handleKeyDown = (e: React.KeyboardEvent) => {
|
const handleKeyDown = (e: React.KeyboardEvent) => {
|
||||||
if (e.key === "Escape") {
|
if (e.key === "Escape") {
|
||||||
e.preventDefault()
|
e.preventDefault()
|
||||||
@@ -48,19 +53,24 @@ export function QuotaLimitToast({
|
|||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
<h3 className="font-semibold text-foreground text-sm">
|
<h3 className="font-semibold text-foreground text-sm">
|
||||||
Daily Quota Reached
|
{isTokenLimit
|
||||||
|
? "Daily Token Limit Reached"
|
||||||
|
: "Daily Quota Reached"}
|
||||||
</h3>
|
</h3>
|
||||||
<span className="px-2 py-0.5 text-xs font-medium rounded-md bg-muted text-muted-foreground">
|
<span className="px-2 py-0.5 text-xs font-medium rounded-md bg-muted text-muted-foreground">
|
||||||
{used}/{limit}
|
{isTokenLimit
|
||||||
|
? `${formatNumber(used)}/${formatNumber(limit)} tokens`
|
||||||
|
: `${used}/${limit}`}
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Message */}
|
{/* Message */}
|
||||||
<div className="text-sm text-muted-foreground leading-relaxed mb-4 space-y-2">
|
<div className="text-sm text-muted-foreground leading-relaxed mb-4 space-y-2">
|
||||||
<p>
|
<p>
|
||||||
Oops — you've reached the daily API limit for this demo! As
|
Oops — you've reached the daily{" "}
|
||||||
an indie developer covering all the API costs myself, I have
|
{isTokenLimit ? "token" : "API"} limit for this demo! As an
|
||||||
to set these limits to keep things sustainable.
|
indie developer covering all the API costs myself, I have to
|
||||||
|
set these limits to keep things sustainable.
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
The good news is that you can self-host the project in
|
The good news is that you can self-host the project in
|
||||||
|
|||||||
@@ -1,21 +1,22 @@
|
|||||||
/**
|
/**
|
||||||
* Token counting utilities using Anthropic's tokenizer
|
* Token counting utilities using js-tiktoken
|
||||||
*
|
*
|
||||||
* This file is separate from system-prompts.ts because the @anthropic-ai/tokenizer
|
* Uses cl100k_base encoding (GPT-4) which is close to Claude's tokenization.
|
||||||
* package uses WebAssembly which doesn't work well with Next.js server-side rendering.
|
* This is a pure JavaScript implementation, no WASM required.
|
||||||
* Import this file only in scripts or client-side code, not in API routes.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { countTokens } from "@anthropic-ai/tokenizer"
|
import { encodingForModel } from "js-tiktoken"
|
||||||
import { DEFAULT_SYSTEM_PROMPT, EXTENDED_SYSTEM_PROMPT } from "./system-prompts"
|
import { DEFAULT_SYSTEM_PROMPT, EXTENDED_SYSTEM_PROMPT } from "./system-prompts"
|
||||||
|
|
||||||
|
const encoder = encodingForModel("gpt-4o")
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Count the number of tokens in a text string using Anthropic's tokenizer
|
* Count the number of tokens in a text string
|
||||||
* @param text - The text to count tokens for
|
* @param text - The text to count tokens for
|
||||||
* @returns The number of tokens
|
* @returns The number of tokens
|
||||||
*/
|
*/
|
||||||
export function countTextTokens(text: string): number {
|
export function countTextTokens(text: string): number {
|
||||||
return countTokens(text)
|
return encoder.encode(text).length
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -28,8 +29,8 @@ export function getSystemPromptTokenCounts(): {
|
|||||||
extended: number
|
extended: number
|
||||||
additions: number
|
additions: number
|
||||||
} {
|
} {
|
||||||
const defaultTokens = countTokens(DEFAULT_SYSTEM_PROMPT)
|
const defaultTokens = countTextTokens(DEFAULT_SYSTEM_PROMPT)
|
||||||
const extendedTokens = countTokens(EXTENDED_SYSTEM_PROMPT)
|
const extendedTokens = countTextTokens(EXTENDED_SYSTEM_PROMPT)
|
||||||
return {
|
return {
|
||||||
default: defaultTokens,
|
default: defaultTokens,
|
||||||
extended: extendedTokens,
|
extended: extendedTokens,
|
||||||
|
|||||||
30
package-lock.json
generated
30
package-lock.json
generated
@@ -37,6 +37,7 @@
|
|||||||
"base-64": "^1.0.0",
|
"base-64": "^1.0.0",
|
||||||
"class-variance-authority": "^0.7.1",
|
"class-variance-authority": "^0.7.1",
|
||||||
"clsx": "^2.1.1",
|
"clsx": "^2.1.1",
|
||||||
|
"js-tiktoken": "^1.0.21",
|
||||||
"jsdom": "^26.0.0",
|
"jsdom": "^26.0.0",
|
||||||
"lucide-react": "^0.483.0",
|
"lucide-react": "^0.483.0",
|
||||||
"next": "^16.0.7",
|
"next": "^16.0.7",
|
||||||
@@ -6290,6 +6291,26 @@
|
|||||||
"integrity": "sha512-kwDPIFCGx0NZHog36dj+tHiwP4QMzsZ3AgMViUBKI0+V5n4U0ufTCUMhnQ04diaRI8EX/QcPfql7zlhZ7j4zgg==",
|
"integrity": "sha512-kwDPIFCGx0NZHog36dj+tHiwP4QMzsZ3AgMViUBKI0+V5n4U0ufTCUMhnQ04diaRI8EX/QcPfql7zlhZ7j4zgg==",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
|
"node_modules/base64-js": {
|
||||||
|
"version": "1.5.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
||||||
|
"integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
|
||||||
|
"funding": [
|
||||||
|
{
|
||||||
|
"type": "github",
|
||||||
|
"url": "https://github.com/sponsors/feross"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "patreon",
|
||||||
|
"url": "https://www.patreon.com/feross"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "consulting",
|
||||||
|
"url": "https://feross.org/support"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/baseline-browser-mapping": {
|
"node_modules/baseline-browser-mapping": {
|
||||||
"version": "2.8.31",
|
"version": "2.8.31",
|
||||||
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.8.31.tgz",
|
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.8.31.tgz",
|
||||||
@@ -8851,6 +8872,15 @@
|
|||||||
"jiti": "lib/jiti-cli.mjs"
|
"jiti": "lib/jiti-cli.mjs"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/js-tiktoken": {
|
||||||
|
"version": "1.0.21",
|
||||||
|
"resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.21.tgz",
|
||||||
|
"integrity": "sha512-biOj/6M5qdgx5TKjDnFT1ymSpM5tbd3ylwDtrQvFQSu0Z7bBYko2dF+W/aUkXUPuk6IVpRxk/3Q2sHOzGlS36g==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"base64-js": "^1.5.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/js-tokens": {
|
"node_modules/js-tokens": {
|
||||||
"version": "4.0.0",
|
"version": "4.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
||||||
|
|||||||
@@ -41,6 +41,7 @@
|
|||||||
"base-64": "^1.0.0",
|
"base-64": "^1.0.0",
|
||||||
"class-variance-authority": "^0.7.1",
|
"class-variance-authority": "^0.7.1",
|
||||||
"clsx": "^2.1.1",
|
"clsx": "^2.1.1",
|
||||||
|
"js-tiktoken": "^1.0.21",
|
||||||
"jsdom": "^26.0.0",
|
"jsdom": "^26.0.0",
|
||||||
"lucide-react": "^0.483.0",
|
"lucide-react": "^0.483.0",
|
||||||
"next": "^16.0.7",
|
"next": "^16.0.7",
|
||||||
|
|||||||
Reference in New Issue
Block a user