feat: support minimax model (#185)

* feat: support minimax model with XML wrapping fix - Add wrapWithMxFile utility to properly wrap XML for draw.io - Fix 'Not a diagram file' error when model generates raw <root> XML - Add supportsPromptCaching check for conditional caching - Only enable Bedrock prompt caching for Claude models * docs: update model mention to minimax-m2 across About pages and READMEs - Update tooltip in chat-panel.tsx to mention minimax-m2 model change - Update English, Chinese, and Japanese About pages with model change info - Update English, Chinese, and Japanese READMEs with demo site model note --------- Co-authored-by: dayuan.jiang <jiangdy@amazon.co.jp>
2026-01-02 22:32:27 +08:00 · 2025-12-09 15:53:59 +09:00
parent 914e914423
commit 967d63c57e
10 changed files with 98 additions and 18 deletions
--- a/app/about/cn/page.tsx
+++ b/app/about/cn/page.tsx
@@ -102,7 +102,7 @@ export default function AboutCN() {
                            {/* Header */}
                            <div className="mb-4">
                                <h3 className="text-lg font-bold text-gray-900 tracking-tight">
-                                    关于扩容与限制{" "}
+                                    模型变更与用量限制{" "}
                                    <span className="text-sm text-amber-600 font-medium italic font-normal">
                                        (或者说：我的钱包顶不住了)
                                    </span>
@@ -116,13 +116,20 @@ export default function AboutCN() {
                                    AI 接口的频率限制
                                    (TPS/TPM)。一旦超限，系统就会暂停，导致请求失败。
                                </p>
+                                <p>
+                                    由于使用量过高，我已将模型从 Claude 更换为{" "}
+                                    <span className="font-semibold text-amber-700">
+                                        minimax-m2
+                                    </span>
+                                    ，以降低成本。
+                                </p>
                                <p>
                                    作为一个
                                    <span className="font-semibold text-amber-700">
                                        独立开发者
                                    </span>
                                    ，目前的 API
-                                    费用全是我自己在掏腰包（纯属为爱发电）。为了保证服务能细水长流，同时也为了避免我个人陷入财务危机，我不得不设置以下临时用量限制：
+                                    费用全是我自己在掏腰包（纯属为爱发电）。为了保证服务能细水长流，同时也为了避免我个人陷入财务危机，我还设置了以下临时用量限制：
                                </p>
                            </div>

--- a/app/about/ja/page.tsx
+++ b/app/about/ja/page.tsx
@@ -110,7 +110,7 @@ export default function AboutJA() {
                            {/* Header */}
                            <div className="mb-4">
                                <h3 className="text-lg font-bold text-gray-900 tracking-tight">
-                                    利用制限とスケーリングについて{" "}
+                                    モデル変更と利用制限について{" "}
                                    <span className="text-sm text-amber-600 font-medium italic font-normal">
                                        （別名：お財布が悲鳴を上げています）
                                    </span>
@@ -124,13 +124,21 @@ export default function AboutJA() {
                                    AI API のレート制限 (TPS/TPM)
                                    に頻繁に引っかかってしまっています。制限に達するとシステムが一時停止し、エラーが発生してしまいます。
                                </p>
+                                <p>
+                                    利用量の増加に伴い、コスト削減のためモデルを
+                                    Claude から{" "}
+                                    <span className="font-semibold text-amber-700">
+                                        minimax-m2
+                                    </span>{" "}
+                                    に変更しました。
+                                </p>
                                <p>
                                    私は現在、
                                    <span className="font-semibold text-amber-700">
                                        個人開発者
                                    </span>
                                    として API
-                                    費用を全額自腹で負担しています。サービスを継続し、かつ私自身が借金を背負わないようにするため（笑）、一時的に以下の利用制限を設けさせていただきました。
+                                    費用を全額自腹で負担しています。サービスを継続し、かつ私自身が借金を背負わないようにするため（笑）、一時的に以下の利用制限も設けさせていただきました。
                                </p>
                            </div>

--- a/app/about/page.tsx
+++ b/app/about/page.tsx
@@ -110,7 +110,7 @@ export default function About() {
                            {/* Header */}
                            <div className="mb-4">
                                <h3 className="text-lg font-bold text-gray-900 tracking-tight">
-                                    Usage Limits & Scaling{" "}
+                                    Model Change & Usage Limits{" "}
                                    <span className="text-sm text-amber-600 font-medium italic font-normal">
                                        (Or: Why My Wallet is Crying)
                                    </span>
@@ -127,6 +127,14 @@ export default function About() {
                                    (TPS/TPM). When this happens, the system
                                    pauses, leading to failed requests.
                                </p>
+                                <p>
+                                    Due to the high usage, I have changed the
+                                    model from Claude to{" "}
+                                    <span className="font-semibold text-amber-700">
+                                        minimax-m2
+                                    </span>
+                                    , which is more cost-effective.
+                                </p>
                                <p>
                                    As an{" "}
                                    <span className="font-semibold text-amber-700">
@@ -135,7 +143,7 @@ export default function About() {
                                    , I am currently footing the entire API
                                    bill. To keep the lights on and ensure the
                                    service remains available to everyone
-                                    without sending me into debt, I have
+                                    without sending me into debt, I have also
                                    implemented the following temporary caps:
                                </p>
                            </div>
--- a/app/api/chat/route.ts
+++ b/app/api/chat/route.ts
@@ -8,7 +8,7 @@ import {
    streamText,
 } from "ai"
 import { z } from "zod"
-import { getAIModel } from "@/lib/ai-providers"
+import { getAIModel, supportsPromptCaching } from "@/lib/ai-providers"
 import { findCachedResponse } from "@/lib/cached-responses"
 import {
    getTelemetryConfig,
@@ -202,6 +202,12 @@ async function handleChatRequest(req: Request): Promise<Response> {
    // Get AI model from environment configuration
    const { model, providerOptions, headers, modelId } = getAIModel()

+    // Check if model supports prompt caching
+    const shouldCache = supportsPromptCaching(modelId)
+    console.log(
+        `[Prompt Caching] ${shouldCache ? "ENABLED" : "DISABLED"} for model: ${modelId}`,
+    )
+
    // Get the appropriate system prompt based on model (extended for Opus/Haiku 4.5)
    const systemMessage = getSystemPrompt(modelId)

@@ -262,7 +268,7 @@ ${lastMessageText}
    // Add cache point to the last assistant message in conversation history
    // This caches the entire conversation prefix for subsequent requests
    // Strategy: system (cached) + history with last assistant (cached) + new user message
-    if (enhancedMessages.length >= 2) {
+    if (shouldCache && enhancedMessages.length >= 2) {
        // Find the last assistant message (should be second-to-last, before current user message)
        for (let i = enhancedMessages.length - 2; i >= 0; i--) {
            if (enhancedMessages[i].role === "assistant") {
@@ -287,17 +293,21 @@ ${lastMessageText}
        {
            role: "system" as const,
            content: systemMessage,
-            providerOptions: {
-                bedrock: { cachePoint: { type: "default" } },
-            },
+            ...(shouldCache && {
+                providerOptions: {
+                    bedrock: { cachePoint: { type: "default" } },
+                },
+            }),
        },
        // Cache breakpoint 2: Current diagram XML context
        {
            role: "system" as const,
            content: `Current diagram XML:\n"""xml\n${xml || ""}\n"""\nWhen using edit_diagram, COPY search patterns exactly from this XML - attribute order matters!`,
-            providerOptions: {
-                bedrock: { cachePoint: { type: "default" } },
-            },
+            ...(shouldCache && {
+                providerOptions: {
+                    bedrock: { cachePoint: { type: "default" } },
+                },
+            }),
        },
    ]