feat: add Bedrock prompt caching for system and conversation messages (#32)

* feat: add Bedrock prompt caching for system and conversation messages - Add cache point to system message (2558+ tokens cached) - Add cache point to last assistant message in conversation history - This caches the entire conversation prefix for subsequent requests - Reduces latency and costs for multi-turn conversations * refactor: remove duplicated system prompt
2026-01-02 22:32:27 +08:00 · 2025-12-01 10:43:33 +09:00
parent d2d4dd01cc
commit c7d0260328
1 changed files with 35 additions and 3 deletions
--- a/app/api/chat/route.ts
+++ b/app/api/chat/route.ts
@@ -140,17 +140,49 @@ ${lastMessageText}
      }
    }
-    console.log("Enhanced messages:", enhancedMessages);
+    // Add cache point to the last assistant message in conversation history
    // This caches the entire conversation prefix for subsequent requests
    // Strategy: system (cached) + history with last assistant (cached) + new user message
    if (enhancedMessages.length >= 2) {
      // Find the last assistant message (should be second-to-last, before current user message)
      for (let i = enhancedMessages.length - 2; i >= 0; i--) {
        if (enhancedMessages[i].role === 'assistant') {
          enhancedMessages[i] = {
            ...enhancedMessages[i],
            providerOptions: {
              bedrock: { cachePoint: { type: 'default' } },
            },
          };
          break; // Only cache the last assistant message
        }
      }
    }
    // Get AI model from environment configuration
    const { model, providerOptions, headers } = getAIModel();
    // System message with cache point for Bedrock (requires 1024+ tokens)
    const systemMessageWithCache = {
      role: 'system' as const,
      content: systemMessage,
      providerOptions: {
        bedrock: { cachePoint: { type: 'default' } },
      },
    };
    const result = streamText({
      model,
-      system: systemMessage,
+      messages: [systemMessageWithCache, ...enhancedMessages],
      messages: enhancedMessages,
      ...(providerOptions && { providerOptions }),
      ...(headers && { headers }),
      onFinish: ({ usage, providerMetadata }) => {
        console.log('[Cache] Usage:', JSON.stringify({
          inputTokens: usage?.inputTokens,
          outputTokens: usage?.outputTokens,
          cachedInputTokens: usage?.cachedInputTokens,
        }, null, 2));
        console.log('[Cache] Provider metadata:', JSON.stringify(providerMetadata, null, 2));
      },
      tools: {
        // Client-side tool that will be executed on the client
        display_diagram: {