mirror of
https://github.com/DayuanJiang/next-ai-draw-io.git
synced 2026-01-02 22:32:27 +08:00
feat: add Bedrock prompt caching for system and conversation messages (#32)
* feat: add Bedrock prompt caching for system and conversation messages - Add cache point to system message (2558+ tokens cached) - Add cache point to last assistant message in conversation history - This caches the entire conversation prefix for subsequent requests - Reduces latency and costs for multi-turn conversations * refactor: remove duplicated system prompt
This commit is contained in:
@@ -140,17 +140,49 @@ ${lastMessageText}
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log("Enhanced messages:", enhancedMessages);
|
// Add cache point to the last assistant message in conversation history
|
||||||
|
// This caches the entire conversation prefix for subsequent requests
|
||||||
|
// Strategy: system (cached) + history with last assistant (cached) + new user message
|
||||||
|
if (enhancedMessages.length >= 2) {
|
||||||
|
// Find the last assistant message (should be second-to-last, before current user message)
|
||||||
|
for (let i = enhancedMessages.length - 2; i >= 0; i--) {
|
||||||
|
if (enhancedMessages[i].role === 'assistant') {
|
||||||
|
enhancedMessages[i] = {
|
||||||
|
...enhancedMessages[i],
|
||||||
|
providerOptions: {
|
||||||
|
bedrock: { cachePoint: { type: 'default' } },
|
||||||
|
},
|
||||||
|
};
|
||||||
|
break; // Only cache the last assistant message
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Get AI model from environment configuration
|
// Get AI model from environment configuration
|
||||||
const { model, providerOptions, headers } = getAIModel();
|
const { model, providerOptions, headers } = getAIModel();
|
||||||
|
|
||||||
|
// System message with cache point for Bedrock (requires 1024+ tokens)
|
||||||
|
const systemMessageWithCache = {
|
||||||
|
role: 'system' as const,
|
||||||
|
content: systemMessage,
|
||||||
|
providerOptions: {
|
||||||
|
bedrock: { cachePoint: { type: 'default' } },
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
const result = streamText({
|
const result = streamText({
|
||||||
model,
|
model,
|
||||||
system: systemMessage,
|
messages: [systemMessageWithCache, ...enhancedMessages],
|
||||||
messages: enhancedMessages,
|
|
||||||
...(providerOptions && { providerOptions }),
|
...(providerOptions && { providerOptions }),
|
||||||
...(headers && { headers }),
|
...(headers && { headers }),
|
||||||
|
onFinish: ({ usage, providerMetadata }) => {
|
||||||
|
console.log('[Cache] Usage:', JSON.stringify({
|
||||||
|
inputTokens: usage?.inputTokens,
|
||||||
|
outputTokens: usage?.outputTokens,
|
||||||
|
cachedInputTokens: usage?.cachedInputTokens,
|
||||||
|
}, null, 2));
|
||||||
|
console.log('[Cache] Provider metadata:', JSON.stringify(providerMetadata, null, 2));
|
||||||
|
},
|
||||||
tools: {
|
tools: {
|
||||||
// Client-side tool that will be executed on the client
|
// Client-side tool that will be executed on the client
|
||||||
display_diagram: {
|
display_diagram: {
|
||||||
|
|||||||
Reference in New Issue
Block a user