From c6b0e5ac622e3cc3b4db668ec22941c628d47d6f Mon Sep 17 00:00:00 2001 From: Dayuan Jiang <34411969+DayuanJiang@users.noreply.github.com> Date: Tue, 23 Dec 2025 20:19:28 +0900 Subject: [PATCH] fix: use totalUsage with all token types for accurate quota tracking (#381) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The onFinish callback's 'usage' only contains the final step's tokens, which underreports usage for multi-step tool calls (like diagram generation). Changed to 'totalUsage' which provides cumulative counts across all steps. Include all 4 token types for accurate counting: 1. inputTokens - non-cached input tokens 2. outputTokens - generated output tokens 3. cachedInputTokens - tokens read from prompt cache 4. inputTokenDetails.cacheWriteTokens - tokens written to cache Tested locally: - Request 1 (cache write): 334 + 62 + 0 + 6671 = 7,067 tokens - Request 2 (cache read): 334 + 184 + 6551 + 120 = 7,189 tokens - DynamoDB total: 14,256 ✓ --- app/api/chat/route.ts | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts index 8db9eb1..68d6d48 100644 --- a/app/api/chat/route.ts +++ b/app/api/chat/route.ts @@ -542,19 +542,24 @@ ${userInputText} userId, }), }), - onFinish: ({ text, usage }) => { + onFinish: ({ text, totalUsage }) => { // AI SDK 6 telemetry auto-reports token usage on its spans setTraceOutput(text) // Record token usage for server-side quota tracking (if enabled) + // Use totalUsage (cumulative across all steps) instead of usage (final step only) + // Include all 4 token types: input, output, cache read, cache write if ( isQuotaEnabled() && !hasOwnApiKey && userId !== "anonymous" && - usage + totalUsage ) { const totalTokens = - (usage.inputTokens || 0) + (usage.outputTokens || 0) + (totalUsage.inputTokens || 0) + + (totalUsage.outputTokens || 0) + + (totalUsage.cachedInputTokens || 0) + + (totalUsage.inputTokenDetails?.cacheWriteTokens || 0) recordTokenUsage(userId, totalTokens) } },