2025-12-07 20:33:43 +09:00
|
|
|
/**
|
2025-12-08 18:56:34 +09:00
|
|
|
* Token counting utilities using js-tiktoken
|
2025-12-07 20:33:43 +09:00
|
|
|
*
|
2025-12-08 18:56:34 +09:00
|
|
|
* Uses cl100k_base encoding (GPT-4) which is close to Claude's tokenization.
|
|
|
|
|
* This is a pure JavaScript implementation, no WASM required.
|
2025-12-07 20:33:43 +09:00
|
|
|
*/
|
|
|
|
|
|
2025-12-08 18:56:34 +09:00
|
|
|
import { encodingForModel } from "js-tiktoken"
|
2025-12-07 20:33:43 +09:00
|
|
|
import { DEFAULT_SYSTEM_PROMPT, EXTENDED_SYSTEM_PROMPT } from "./system-prompts"
|
|
|
|
|
|
2025-12-08 18:56:34 +09:00
|
|
|
const encoder = encodingForModel("gpt-4o")
|
|
|
|
|
|
2025-12-07 20:33:43 +09:00
|
|
|
/**
|
2025-12-08 18:56:34 +09:00
|
|
|
* Count the number of tokens in a text string
|
2025-12-07 20:33:43 +09:00
|
|
|
* @param text - The text to count tokens for
|
|
|
|
|
* @returns The number of tokens
|
|
|
|
|
*/
|
|
|
|
|
export function countTextTokens(text: string): number {
|
2025-12-08 18:56:34 +09:00
|
|
|
return encoder.encode(text).length
|
2025-12-07 20:33:43 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get token counts for the system prompts
|
|
|
|
|
* Useful for debugging and optimizing prompt sizes
|
|
|
|
|
* @returns Object with token counts for default and extended prompts
|
|
|
|
|
*/
|
|
|
|
|
export function getSystemPromptTokenCounts(): {
|
|
|
|
|
default: number
|
|
|
|
|
extended: number
|
|
|
|
|
additions: number
|
|
|
|
|
} {
|
2025-12-08 18:56:34 +09:00
|
|
|
const defaultTokens = countTextTokens(DEFAULT_SYSTEM_PROMPT)
|
|
|
|
|
const extendedTokens = countTextTokens(EXTENDED_SYSTEM_PROMPT)
|
2025-12-07 20:33:43 +09:00
|
|
|
return {
|
|
|
|
|
default: defaultTokens,
|
|
|
|
|
extended: extendedTokens,
|
|
|
|
|
additions: extendedTokens - defaultTokens,
|
|
|
|
|
}
|
|
|
|
|
}
|