refactor: simplify Langfuse integration with AI SDK 6 (#375)

- Remove manual token attribute setting (AI SDK 6 telemetry auto-reports)
- Use totalTokens directly instead of inputTokens + outputTokens calculation
- Fix sessionId bug in log-save/log-feedback (prevents wrong trace attachment)
- Hash IP addresses for privacy instead of storing raw IPs
- Fix isLangfuseEnabled() to check both keys for consistency
This commit is contained in:
Dayuan Jiang
2025-12-23 16:26:45 +09:00
committed by GitHub
parent 9aec7eda79
commit 5ec05eb100
5 changed files with 42 additions and 60 deletions

View File

@@ -162,9 +162,13 @@ async function handleChatRequest(req: Request): Promise<Response> {
const { messages, xml, previousXml, sessionId } = await req.json() const { messages, xml, previousXml, sessionId } = await req.json()
// Get user IP for Langfuse tracking // Get user IP for Langfuse tracking (hashed for privacy)
const forwardedFor = req.headers.get("x-forwarded-for") const forwardedFor = req.headers.get("x-forwarded-for")
const userId = forwardedFor?.split(",")[0]?.trim() || "anonymous" const rawIp = forwardedFor?.split(",")[0]?.trim() || "anonymous"
const userId =
rawIp === "anonymous"
? rawIp
: `user-${Buffer.from(rawIp).toString("base64url").slice(0, 8)}`
// Validate sessionId for Langfuse (must be string, max 200 chars) // Validate sessionId for Langfuse (must be string, max 200 chars)
const validSessionId = const validSessionId =
@@ -506,12 +510,9 @@ ${userInputText}
userId, userId,
}), }),
}), }),
onFinish: ({ text, usage }) => { onFinish: ({ text }) => {
// Pass usage to Langfuse (Bedrock streaming doesn't auto-report tokens to telemetry) // AI SDK 6 telemetry auto-reports token usage on its spans
setTraceOutput(text, { setTraceOutput(text)
promptTokens: usage?.inputTokens,
completionTokens: usage?.outputTokens,
})
}, },
tools: { tools: {
// Client-side tool that will be executed on the client // Client-side tool that will be executed on the client
@@ -681,20 +682,9 @@ Call this tool to get shape names and usage syntax for a specific library.`,
messageMetadata: ({ part }) => { messageMetadata: ({ part }) => {
if (part.type === "finish") { if (part.type === "finish") {
const usage = (part as any).totalUsage const usage = (part as any).totalUsage
if (!usage) { // AI SDK 6 provides totalTokens directly
console.warn(
"[messageMetadata] No usage data in finish part",
)
return undefined
}
// Total input = non-cached + cached (these are separate counts)
// Note: cacheWriteInputTokens is not available on finish part
const totalInputTokens =
(usage.inputTokens ?? 0) +
(usage.inputTokenDetails?.cacheReadTokens ?? 0)
return { return {
inputTokens: totalInputTokens, totalTokens: usage?.totalTokens ?? 0,
outputTokens: usage.outputTokens ?? 0,
finishReason: (part as any).finishReason, finishReason: (part as any).finishReason,
} }
} }

View File

@@ -27,9 +27,18 @@ export async function POST(req: Request) {
const { messageId, feedback, sessionId } = data const { messageId, feedback, sessionId } = data
// Get user IP for tracking // Skip logging if no sessionId - prevents attaching to wrong user's trace
if (!sessionId) {
return Response.json({ success: true, logged: false })
}
// Get user IP for tracking (hashed for privacy)
const forwardedFor = req.headers.get("x-forwarded-for") const forwardedFor = req.headers.get("x-forwarded-for")
const userId = forwardedFor?.split(",")[0]?.trim() || "anonymous" const rawIp = forwardedFor?.split(",")[0]?.trim() || "anonymous"
const userId =
rawIp === "anonymous"
? rawIp
: `user-${Buffer.from(rawIp).toString("base64url").slice(0, 8)}`
try { try {
// Find the most recent chat trace for this session to attach the score to // Find the most recent chat trace for this session to attach the score to

View File

@@ -27,6 +27,11 @@ export async function POST(req: Request) {
const { filename, format, sessionId } = data const { filename, format, sessionId } = data
// Skip logging if no sessionId - prevents attaching to wrong user's trace
if (!sessionId) {
return Response.json({ success: true, logged: false })
}
try { try {
const timestamp = new Date().toISOString() const timestamp = new Date().toISOString()

View File

@@ -632,21 +632,15 @@ Continue from EXACTLY where you stopped.`,
// DEBUG: Log finish reason to diagnose truncation // DEBUG: Log finish reason to diagnose truncation
console.log("[onFinish] finishReason:", metadata?.finishReason) console.log("[onFinish] finishReason:", metadata?.finishReason)
console.log("[onFinish] metadata:", metadata)
if (metadata) { // AI SDK 6 provides totalTokens directly
// Use Number.isFinite to guard against NaN (typeof NaN === 'number' is true) const totalTokens =
const inputTokens = Number.isFinite(metadata.inputTokens) metadata && Number.isFinite(metadata.totalTokens)
? (metadata.inputTokens as number) ? (metadata.totalTokens as number)
: 0 : 0
const outputTokens = Number.isFinite(metadata.outputTokens) if (totalTokens > 0) {
? (metadata.outputTokens as number) quotaManager.incrementTokenCount(totalTokens)
: 0 quotaManager.incrementTPMCount(totalTokens)
const actualTokens = inputTokens + outputTokens
if (actualTokens > 0) {
quotaManager.incrementTokenCount(actualTokens)
quotaManager.incrementTPMCount(actualTokens)
}
} }
}, },
sendAutomaticallyWhen: ({ messages }) => { sendAutomaticallyWhen: ({ messages }) => {

View File

@@ -21,9 +21,11 @@ export function getLangfuseClient(): LangfuseClient | null {
return langfuseClient return langfuseClient
} }
// Check if Langfuse is configured // Check if Langfuse is configured (both keys required)
export function isLangfuseEnabled(): boolean { export function isLangfuseEnabled(): boolean {
return !!process.env.LANGFUSE_PUBLIC_KEY return !!(
process.env.LANGFUSE_PUBLIC_KEY && process.env.LANGFUSE_SECRET_KEY
)
} }
// Update trace with input data at the start of request // Update trace with input data at the start of request
@@ -43,34 +45,16 @@ export function setTraceInput(params: {
} }
// Update trace with output and end the span // Update trace with output and end the span
export function setTraceOutput( // Note: AI SDK 6 telemetry automatically reports token usage on its spans,
output: string, // so we only need to set the output text and close our wrapper span
usage?: { promptTokens?: number; completionTokens?: number }, export function setTraceOutput(output: string) {
) {
if (!isLangfuseEnabled()) return if (!isLangfuseEnabled()) return
updateActiveTrace({ output }) updateActiveTrace({ output })
// End the observe() wrapper span (AI SDK creates its own child spans with usage)
const activeSpan = api.trace.getActiveSpan() const activeSpan = api.trace.getActiveSpan()
if (activeSpan) { if (activeSpan) {
// Manually set usage attributes since AI SDK Bedrock streaming doesn't provide them
if (usage?.promptTokens) {
activeSpan.setAttribute("ai.usage.promptTokens", usage.promptTokens)
activeSpan.setAttribute(
"gen_ai.usage.input_tokens",
usage.promptTokens,
)
}
if (usage?.completionTokens) {
activeSpan.setAttribute(
"ai.usage.completionTokens",
usage.completionTokens,
)
activeSpan.setAttribute(
"gen_ai.usage.output_tokens",
usage.completionTokens,
)
}
activeSpan.end() activeSpan.end()
} }
} }