fix: use totalUsage with all token types for accurate quota tracking (#381)

The onFinish callback's 'usage' only contains the final step's tokens,
which underreports usage for multi-step tool calls (like diagram generation).
Changed to 'totalUsage' which provides cumulative counts across all steps.

Include all 4 token types for accurate counting:
1. inputTokens - non-cached input tokens
2. outputTokens - generated output tokens
3. cachedInputTokens - tokens read from prompt cache
4. inputTokenDetails.cacheWriteTokens - tokens written to cache

Tested locally:
- Request 1 (cache write): 334 + 62 + 0 + 6671 = 7,067 tokens
- Request 2 (cache read): 334 + 184 + 6551 + 120 = 7,189 tokens
- DynamoDB total: 14,256 ✓
This commit is contained in:
Dayuan Jiang
2025-12-23 20:19:28 +09:00
committed by GitHub
parent 7de192e1fa
commit c6b0e5ac62

View File

@@ -542,19 +542,24 @@ ${userInputText}
userId, userId,
}), }),
}), }),
onFinish: ({ text, usage }) => { onFinish: ({ text, totalUsage }) => {
// AI SDK 6 telemetry auto-reports token usage on its spans // AI SDK 6 telemetry auto-reports token usage on its spans
setTraceOutput(text) setTraceOutput(text)
// Record token usage for server-side quota tracking (if enabled) // Record token usage for server-side quota tracking (if enabled)
// Use totalUsage (cumulative across all steps) instead of usage (final step only)
// Include all 4 token types: input, output, cache read, cache write
if ( if (
isQuotaEnabled() && isQuotaEnabled() &&
!hasOwnApiKey && !hasOwnApiKey &&
userId !== "anonymous" && userId !== "anonymous" &&
usage totalUsage
) { ) {
const totalTokens = const totalTokens =
(usage.inputTokens || 0) + (usage.outputTokens || 0) (totalUsage.inputTokens || 0) +
(totalUsage.outputTokens || 0) +
(totalUsage.cachedInputTokens || 0) +
(totalUsage.inputTokenDetails?.cacheWriteTokens || 0)
recordTokenUsage(userId, totalTokens) recordTokenUsage(userId, totalTokens)
} }
}, },