feat: add PDF and text file upload support (#205)

- Add client-side PDF text extraction using unpdf library
- Support text files (.txt, .md, .json, .csv, .py, .js, .ts, etc.)
- Add file preview with character count for PDF/text files
- Add 150k character limit for extracted content
- Highlight Paper to Diagram example with NEW badge
- Fix React hydration error by adding explicit IDs to ResizablePanelGroup
- Remove code duplication by centralizing file utilities in pdf-utils.ts
This commit is contained in:
Dayuan Jiang
2025-12-10 21:32:35 +09:00
committed by GitHub
parent 43e5993f47
commit d2ba133eaf
14 changed files with 940 additions and 57 deletions

View File

@@ -394,6 +394,13 @@ ${lastMessageText}
return null return null
}, },
onFinish: ({ text, usage }) => { onFinish: ({ text, usage }) => {
// Log token usage
if (usage) {
const cachedTokens = (usage as any).cachedInputTokens ?? 0
console.log(
`[Token Usage] input: ${usage.inputTokens ?? 0}, cached: ${cachedTokens}, output: ${usage.outputTokens ?? 0}, total: ${(usage.inputTokens ?? 0) + cachedTokens + (usage.outputTokens ?? 0)}`,
)
}
// Pass usage to Langfuse (Bedrock streaming doesn't auto-report tokens to telemetry) // Pass usage to Langfuse (Bedrock streaming doesn't auto-report tokens to telemetry)
setTraceOutput(text, { setTraceOutput(text, {
promptTokens: usage?.inputTokens, promptTokens: usage?.inputTokens,

View File

@@ -121,12 +121,17 @@ export default function Home() {
return ( return (
<div className="h-screen bg-background relative overflow-hidden"> <div className="h-screen bg-background relative overflow-hidden">
<ResizablePanelGroup <ResizablePanelGroup
id="main-panel-group"
key={isMobile ? "mobile" : "desktop"} key={isMobile ? "mobile" : "desktop"}
direction={isMobile ? "vertical" : "horizontal"} direction={isMobile ? "vertical" : "horizontal"}
className="h-full" className="h-full"
> >
{/* Draw.io Canvas */} {/* Draw.io Canvas */}
<ResizablePanel defaultSize={isMobile ? 50 : 67} minSize={20}> <ResizablePanel
id="drawio-panel"
defaultSize={isMobile ? 50 : 67}
minSize={20}
>
<div <div
className={`h-full relative ${ className={`h-full relative ${
isMobile ? "p-1" : "p-2" isMobile ? "p-1" : "p-2"
@@ -162,6 +167,7 @@ export default function Home() {
{/* Chat Panel */} {/* Chat Panel */}
<ResizablePanel <ResizablePanel
id="chat-panel"
ref={chatPanelRef} ref={chatPanelRef}
defaultSize={isMobile ? 50 : 33} defaultSize={isMobile ? 50 : 33}
minSize={isMobile ? 20 : 15} minSize={isMobile ? 20 : 15}

View File

@@ -1,28 +1,52 @@
"use client" "use client"
import { Cloud, GitBranch, Palette, Zap } from "lucide-react" import { Cloud, FileText, GitBranch, Palette, Zap } from "lucide-react"
interface ExampleCardProps { interface ExampleCardProps {
icon: React.ReactNode icon: React.ReactNode
title: string title: string
description: string description: string
onClick: () => void onClick: () => void
isNew?: boolean
} }
function ExampleCard({ icon, title, description, onClick }: ExampleCardProps) { function ExampleCard({
icon,
title,
description,
onClick,
isNew,
}: ExampleCardProps) {
return ( return (
<button <button
onClick={onClick} onClick={onClick}
className="group w-full text-left p-4 rounded-xl border border-border/60 bg-card hover:bg-accent/50 hover:border-primary/30 transition-all duration-200 hover:shadow-sm" className={`group w-full text-left p-4 rounded-xl border bg-card hover:bg-accent/50 hover:border-primary/30 transition-all duration-200 hover:shadow-sm ${
isNew
? "border-primary/40 ring-1 ring-primary/20"
: "border-border/60"
}`}
> >
<div className="flex items-start gap-3"> <div className="flex items-start gap-3">
<div className="w-9 h-9 rounded-lg bg-primary/10 flex items-center justify-center shrink-0 group-hover:bg-primary/15 transition-colors"> <div
className={`w-9 h-9 rounded-lg flex items-center justify-center shrink-0 transition-colors ${
isNew
? "bg-primary/20 group-hover:bg-primary/25"
: "bg-primary/10 group-hover:bg-primary/15"
}`}
>
{icon} {icon}
</div> </div>
<div className="min-w-0"> <div className="min-w-0">
<h3 className="text-sm font-medium text-foreground group-hover:text-primary transition-colors"> <div className="flex items-center gap-2">
{title} <h3 className="text-sm font-medium text-foreground group-hover:text-primary transition-colors">
</h3> {title}
</h3>
{isNew && (
<span className="px-1.5 py-0.5 text-[10px] font-semibold bg-primary text-primary-foreground rounded">
NEW
</span>
)}
</div>
<p className="text-xs text-muted-foreground mt-0.5 line-clamp-2"> <p className="text-xs text-muted-foreground mt-0.5 line-clamp-2">
{description} {description}
</p> </p>
@@ -67,6 +91,21 @@ export default function ExamplePanel({
} }
} }
const handlePdfExample = async () => {
setInput("Summarize this paper as a diagram")
try {
const response = await fetch("/chain-of-thought.txt")
const blob = await response.blob()
const file = new File([blob], "chain-of-thought.txt", {
type: "text/plain",
})
setFiles([file])
} catch (error) {
console.error("Error loading text file:", error)
}
}
return ( return (
<div className="py-6 px-2 animate-fade-in"> <div className="py-6 px-2 animate-fade-in">
{/* Welcome section */} {/* Welcome section */}
@@ -87,6 +126,14 @@ export default function ExamplePanel({
</p> </p>
<div className="grid gap-2"> <div className="grid gap-2">
<ExampleCard
icon={<FileText className="w-4 h-4 text-primary" />}
title="Paper to Diagram"
description="Upload .pdf, .txt, .md, .json, .csv, .py, .js, .ts and more"
onClick={handlePdfExample}
isNew
/>
<ExampleCard <ExampleCard
icon={<Zap className="w-4 h-4 text-primary" />} icon={<Zap className="w-4 h-4 text-primary" />}
title="Animated Diagram" title="Animated Diagram"

View File

@@ -19,11 +19,16 @@ import { SaveDialog } from "@/components/save-dialog"
import { Button } from "@/components/ui/button" import { Button } from "@/components/ui/button"
import { Textarea } from "@/components/ui/textarea" import { Textarea } from "@/components/ui/textarea"
import { useDiagram } from "@/contexts/diagram-context" import { useDiagram } from "@/contexts/diagram-context"
import { isPdfFile, isTextFile } from "@/lib/pdf-utils"
import { FilePreviewList } from "./file-preview-list" import { FilePreviewList } from "./file-preview-list"
const MAX_FILE_SIZE = 2 * 1024 * 1024 // 2MB const MAX_IMAGE_SIZE = 2 * 1024 * 1024 // 2MB
const MAX_FILES = 5 const MAX_FILES = 5
function isValidFileType(file: File): boolean {
return file.type.startsWith("image/") || isPdfFile(file) || isTextFile(file)
}
function formatFileSize(bytes: number): string { function formatFileSize(bytes: number): string {
const mb = bytes / 1024 / 1024 const mb = bytes / 1024 / 1024
if (mb < 0.01) return `${(bytes / 1024).toFixed(0)}KB` if (mb < 0.01) return `${(bytes / 1024).toFixed(0)}KB`
@@ -63,9 +68,16 @@ function validateFiles(
errors.push(`Only ${availableSlots} more file(s) allowed`) errors.push(`Only ${availableSlots} more file(s) allowed`)
break break
} }
if (file.size > MAX_FILE_SIZE) { if (!isValidFileType(file)) {
errors.push(`"${file.name}" is not a supported file type`)
continue
}
// Only check size for images (PDFs/text files are extracted client-side, so file size doesn't matter)
const isExtractedFile = isPdfFile(file) || isTextFile(file)
if (!isExtractedFile && file.size > MAX_IMAGE_SIZE) {
const maxSizeMB = MAX_IMAGE_SIZE / 1024 / 1024
errors.push( errors.push(
`"${file.name}" is ${formatFileSize(file.size)} (exceeds 2MB)`, `"${file.name}" is ${formatFileSize(file.size)} (exceeds ${maxSizeMB}MB)`,
) )
} else { } else {
validFiles.push(file) validFiles.push(file)
@@ -109,6 +121,10 @@ interface ChatInputProps {
onClearChat: () => void onClearChat: () => void
files?: File[] files?: File[]
onFileChange?: (files: File[]) => void onFileChange?: (files: File[]) => void
pdfData?: Map<
File,
{ text: string; charCount: number; isExtracting: boolean }
>
showHistory?: boolean showHistory?: boolean
onToggleHistory?: (show: boolean) => void onToggleHistory?: (show: boolean) => void
sessionId?: string sessionId?: string
@@ -123,6 +139,7 @@ export function ChatInput({
onClearChat, onClearChat,
files = [], files = [],
onFileChange = () => {}, onFileChange = () => {},
pdfData = new Map(),
showHistory = false, showHistory = false,
onToggleHistory = () => {}, onToggleHistory = () => {},
sessionId, sessionId,
@@ -245,11 +262,14 @@ export function ChatInput({
if (isDisabled) return if (isDisabled) return
const droppedFiles = e.dataTransfer.files const droppedFiles = e.dataTransfer.files
const imageFiles = Array.from(droppedFiles).filter((file) => const supportedFiles = Array.from(droppedFiles).filter((file) =>
file.type.startsWith("image/"), isValidFileType(file),
) )
const { validFiles, errors } = validateFiles(imageFiles, files.length) const { validFiles, errors } = validateFiles(
supportedFiles,
files.length,
)
showValidationErrors(errors) showValidationErrors(errors)
if (validFiles.length > 0) { if (validFiles.length > 0) {
onFileChange([...files, ...validFiles]) onFileChange([...files, ...validFiles])
@@ -279,6 +299,7 @@ export function ChatInput({
<FilePreviewList <FilePreviewList
files={files} files={files}
onRemoveFile={handleRemoveFile} onRemoveFile={handleRemoveFile}
pdfData={pdfData}
/> />
</div> </div>
)} )}
@@ -291,7 +312,7 @@ export function ChatInput({
onChange={handleChange} onChange={handleChange}
onKeyDown={handleKeyDown} onKeyDown={handleKeyDown}
onPaste={handlePaste} onPaste={handlePaste}
placeholder="Describe your diagram or paste an image..." placeholder="Describe your diagram or upload a file..."
disabled={isDisabled} disabled={isDisabled}
aria-label="Chat input" aria-label="Chat input"
className="min-h-[60px] max-h-[200px] resize-none border-0 bg-transparent px-4 py-3 text-sm focus-visible:ring-0 focus-visible:ring-offset-0 placeholder:text-muted-foreground/60" className="min-h-[60px] max-h-[200px] resize-none border-0 bg-transparent px-4 py-3 text-sm focus-visible:ring-0 focus-visible:ring-offset-0 placeholder:text-muted-foreground/60"
@@ -367,7 +388,7 @@ export function ChatInput({
size="sm" size="sm"
onClick={triggerFileInput} onClick={triggerFileInput}
disabled={isDisabled} disabled={isDisabled}
tooltipContent="Upload image" tooltipContent="Upload file (image, PDF, text)"
className="h-8 w-8 p-0 text-muted-foreground hover:text-foreground" className="h-8 w-8 p-0 text-muted-foreground hover:text-foreground"
> >
<ImageIcon className="h-4 w-4" /> <ImageIcon className="h-4 w-4" />
@@ -378,7 +399,7 @@ export function ChatInput({
ref={fileInputRef} ref={fileInputRef}
className="hidden" className="hidden"
onChange={handleFileChange} onChange={handleFileChange}
accept="image/*" accept="image/*,.pdf,application/pdf,text/*,.md,.markdown,.json,.csv,.xml,.yaml,.yml,.toml"
multiple multiple
disabled={isDisabled} disabled={isDisabled}
/> />

View File

@@ -8,6 +8,8 @@ import {
ChevronUp, ChevronUp,
Copy, Copy,
Cpu, Cpu,
FileCode,
FileText,
Minus, Minus,
Pencil, Pencil,
Plus, Plus,
@@ -89,6 +91,59 @@ function EditDiffDisplay({ edits }: { edits: EditPair[] }) {
import { useDiagram } from "@/contexts/diagram-context" import { useDiagram } from "@/contexts/diagram-context"
// Helper to split text content into regular text and file sections (PDF or text files)
interface TextSection {
type: "text" | "file"
content: string
filename?: string
charCount?: number
fileType?: "pdf" | "text"
}
function splitTextIntoFileSections(text: string): TextSection[] {
const sections: TextSection[] = []
// Match [PDF: filename] or [File: filename] patterns
const filePattern =
/\[(PDF|File):\s*([^\]]+)\]\n([\s\S]*?)(?=\n\n\[(PDF|File):|$)/g
let lastIndex = 0
let match
while ((match = filePattern.exec(text)) !== null) {
// Add text before this file section
const beforeText = text.slice(lastIndex, match.index).trim()
if (beforeText) {
sections.push({ type: "text", content: beforeText })
}
// Add file section
const fileType = match[1].toLowerCase() === "pdf" ? "pdf" : "text"
const filename = match[2].trim()
const fileContent = match[3].trim()
sections.push({
type: "file",
content: fileContent,
filename,
charCount: fileContent.length,
fileType,
})
lastIndex = match.index + match[0].length
}
// Add remaining text after last file section
const remainingText = text.slice(lastIndex).trim()
if (remainingText) {
sections.push({ type: "text", content: remainingText })
}
// If no file sections found, return original text
if (sections.length === 0) {
sections.push({ type: "text", content: text })
}
return sections
}
const getMessageTextContent = (message: UIMessage): string => { const getMessageTextContent = (message: UIMessage): string => {
if (!message.parts) return "" if (!message.parts) return ""
return message.parts return message.parts
@@ -97,6 +152,14 @@ const getMessageTextContent = (message: UIMessage): string => {
.join("\n") .join("\n")
} }
// Get only the user's original text, excluding appended file content
const getUserOriginalText = (message: UIMessage): string => {
const fullText = getMessageTextContent(message)
// Strip out [PDF: ...] and [File: ...] sections that were appended
const filePattern = /\n\n\[(PDF|File):\s*[^\]]+\]\n[\s\S]*$/
return fullText.replace(filePattern, "").trim()
}
interface ChatMessageDisplayProps { interface ChatMessageDisplayProps {
messages: UIMessage[] messages: UIMessage[]
setInput: (input: string) => void setInput: (input: string) => void
@@ -131,6 +194,10 @@ export function ChatMessageDisplay({
) )
const editTextareaRef = useRef<HTMLTextAreaElement>(null) const editTextareaRef = useRef<HTMLTextAreaElement>(null)
const [editText, setEditText] = useState<string>("") const [editText, setEditText] = useState<string>("")
// Track which PDF sections are expanded (key: messageId-sectionIndex)
const [expandedPdfSections, setExpandedPdfSections] = useState<
Record<string, boolean>
>({})
const copyMessageToClipboard = async (messageId: string, text: string) => { const copyMessageToClipboard = async (messageId: string, text: string) => {
try { try {
@@ -391,7 +458,9 @@ export function ChatMessageDisplay({
message.id, message.id,
) )
setEditText( setEditText(
userMessageText, getUserOriginalText(
message,
),
) )
}} }}
className="p-1.5 rounded-lg text-muted-foreground/60 hover:text-muted-foreground hover:bg-muted transition-colors" className="p-1.5 rounded-lg text-muted-foreground/60 hover:text-muted-foreground hover:bg-muted transition-colors"
@@ -607,7 +676,9 @@ export function ChatMessageDisplay({
message.id, message.id,
) )
setEditText( setEditText(
userMessageText, getUserOriginalText(
message,
),
) )
} }
}} }}
@@ -627,7 +698,9 @@ export function ChatMessageDisplay({
message.id, message.id,
) )
setEditText( setEditText(
userMessageText, getUserOriginalText(
message,
),
) )
} }
}} }}
@@ -649,26 +722,126 @@ export function ChatMessageDisplay({
part.type === part.type ===
"text" "text"
) { ) {
const textContent =
(
part as {
text: string
}
)
.text
const sections =
splitTextIntoFileSections(
textContent,
)
return ( return (
<div <div
key={`${message.id}-text-${group.startIndex}-${partIndex}`} key={`${message.id}-text-${group.startIndex}-${partIndex}`}
className={`prose prose-sm max-w-none break-words [&>*:first-child]:mt-0 [&>*:last-child]:mb-0 ${ className="space-y-2"
message.role ===
"user"
? "[&_*]:!text-primary-foreground prose-code:bg-white/20"
: "dark:prose-invert"
}`}
> >
<ReactMarkdown> {sections.map(
{ (
( section,
part as { sectionIndex,
text: string ) => {
} if (
section.type ===
"file"
) {
const pdfKey = `${message.id}-file-${partIndex}-${sectionIndex}`
const isExpanded =
expandedPdfSections[
pdfKey
] ??
false
const charDisplay =
section.charCount &&
section.charCount >=
1000
? `${(section.charCount / 1000).toFixed(1)}k`
: section.charCount
return (
<div
key={
pdfKey
}
className="rounded-lg border border-border/60 bg-muted/30 overflow-hidden"
>
<button
type="button"
onClick={(
e,
) => {
e.stopPropagation()
setExpandedPdfSections(
(
prev,
) => ({
...prev,
[pdfKey]:
!isExpanded,
}),
)
}}
className="w-full flex items-center justify-between px-3 py-2 hover:bg-muted/50 transition-colors"
>
<div className="flex items-center gap-2">
{section.fileType ===
"pdf" ? (
<FileText className="h-4 w-4 text-red-500" />
) : (
<FileCode className="h-4 w-4 text-blue-500" />
)}
<span className="text-xs font-medium">
{
section.filename
}
</span>
<span className="text-[10px] text-muted-foreground">
(
{
charDisplay
}{" "}
chars)
</span>
</div>
{isExpanded ? (
<ChevronUp className="h-4 w-4 text-muted-foreground" />
) : (
<ChevronDown className="h-4 w-4 text-muted-foreground" />
)}
</button>
{isExpanded && (
<div className="px-3 py-2 border-t border-border/40 max-h-48 overflow-y-auto bg-muted/30">
<pre className="text-xs whitespace-pre-wrap text-foreground/80">
{
section.content
}
</pre>
</div>
)}
</div>
)
}
// Regular text section
return (
<div
key={`${message.id}-textsection-${partIndex}-${sectionIndex}`}
className={`prose prose-sm max-w-none break-words [&>*:first-child]:mt-0 [&>*:last-child]:mb-0 ${
message.role ===
"user"
? "[&_*]:!text-primary-foreground prose-code:bg-white/20"
: "dark:prose-invert"
}`}
>
<ReactMarkdown>
{
section.content
}
</ReactMarkdown>
</div>
) )
.text },
} )}
</ReactMarkdown>
</div> </div>
) )
} }

View File

@@ -44,6 +44,13 @@ const STORAGE_TPM_MINUTE_KEY = "next-ai-draw-io-tpm-minute"
import { useDiagram } from "@/contexts/diagram-context" import { useDiagram } from "@/contexts/diagram-context"
import { findCachedResponse } from "@/lib/cached-responses" import { findCachedResponse } from "@/lib/cached-responses"
import {
extractPdfText,
extractTextFileContent,
isPdfFile,
isTextFile,
MAX_EXTRACTED_CHARS,
} from "@/lib/pdf-utils"
import { formatXML, wrapWithMxFile } from "@/lib/utils" import { formatXML, wrapWithMxFile } from "@/lib/utils"
import { ChatMessageDisplay } from "./chat-message-display" import { ChatMessageDisplay } from "./chat-message-display"
@@ -105,6 +112,10 @@ export default function ChatPanel({
} }
const [files, setFiles] = useState<File[]>([]) const [files, setFiles] = useState<File[]>([])
// Store extracted PDF text with extraction status
const [pdfData, setPdfData] = useState<
Map<File, { text: string; charCount: number; isExtracting: boolean }>
>(new Map())
const [showHistory, setShowHistory] = useState(false) const [showHistory, setShowHistory] = useState(false)
const [showSettingsDialog, setShowSettingsDialog] = useState(false) const [showSettingsDialog, setShowSettingsDialog] = useState(false)
const [, setAccessCodeRequired] = useState(false) const [, setAccessCodeRequired] = useState(false)
@@ -711,11 +722,28 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
// Add user message and fake assistant response to messages // Add user message and fake assistant response to messages
// The chat-message-display useEffect will handle displaying the diagram // The chat-message-display useEffect will handle displaying the diagram
const toolCallId = `cached-${Date.now()}` const toolCallId = `cached-${Date.now()}`
// Build user message text including any file content
let userText = input
for (const file of files) {
if (isPdfFile(file)) {
const extracted = pdfData.get(file)
if (extracted?.text) {
userText += `\n\n[PDF: ${file.name}]\n${extracted.text}`
}
} else if (isTextFile(file)) {
const extracted = pdfData.get(file)
if (extracted?.text) {
userText += `\n\n[File: ${file.name}]\n${extracted.text}`
}
}
}
setMessages([ setMessages([
{ {
id: `user-${Date.now()}`, id: `user-${Date.now()}`,
role: "user" as const, role: "user" as const,
parts: [{ type: "text" as const, text: input }], parts: [{ type: "text" as const, text: userText }],
}, },
{ {
id: `assistant-${Date.now()}`, id: `assistant-${Date.now()}`,
@@ -745,25 +773,48 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
// This ensures edit_diagram has the correct XML before AI responds // This ensures edit_diagram has the correct XML before AI responds
chartXMLRef.current = chartXml chartXMLRef.current = chartXml
const parts: any[] = [{ type: "text", text: input }] // Build user text by concatenating input with pre-extracted text
// (Backend only reads first text part, so we must combine them)
let userText = input
const parts: any[] = []
if (files.length > 0) { if (files.length > 0) {
for (const file of files) { for (const file of files) {
const reader = new FileReader() if (isPdfFile(file)) {
const dataUrl = await new Promise<string>((resolve) => { // Use pre-extracted PDF text from pdfData
reader.onload = () => const extracted = pdfData.get(file)
resolve(reader.result as string) if (extracted?.text) {
reader.readAsDataURL(file) userText += `\n\n[PDF: ${file.name}]\n${extracted.text}`
}) }
} else if (isTextFile(file)) {
// Use pre-extracted text file content from pdfData
const extracted = pdfData.get(file)
if (extracted?.text) {
userText += `\n\n[File: ${file.name}]\n${extracted.text}`
}
} else {
// Handle as image
const reader = new FileReader()
const dataUrl = await new Promise<string>(
(resolve) => {
reader.onload = () =>
resolve(reader.result as string)
reader.readAsDataURL(file)
},
)
parts.push({ parts.push({
type: "file", type: "file",
url: dataUrl, url: dataUrl,
mediaType: file.type, mediaType: file.type,
}) })
}
} }
} }
// Add the combined text as the first part
parts.unshift({ type: "text", text: userText })
// Get previous XML from the last snapshot (before this message) // Get previous XML from the last snapshot (before this message)
const snapshotKeys = Array.from( const snapshotKeys = Array.from(
xmlSnapshotsRef.current.keys(), xmlSnapshotsRef.current.keys(),
@@ -843,8 +894,81 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
setInput(e.target.value) setInput(e.target.value)
} }
const handleFileChange = (newFiles: File[]) => { const handleFileChange = async (newFiles: File[]) => {
setFiles(newFiles) setFiles(newFiles)
// Extract text immediately for new PDF/text files
for (const file of newFiles) {
const needsExtraction =
(isPdfFile(file) || isTextFile(file)) && !pdfData.has(file)
if (needsExtraction) {
// Mark as extracting
setPdfData((prev) => {
const next = new Map(prev)
next.set(file, {
text: "",
charCount: 0,
isExtracting: true,
})
return next
})
// Extract text asynchronously
try {
let text: string
if (isPdfFile(file)) {
text = await extractPdfText(file)
} else {
text = await extractTextFileContent(file)
}
// Check character limit
if (text.length > MAX_EXTRACTED_CHARS) {
const limitK = MAX_EXTRACTED_CHARS / 1000
toast.error(
`${file.name}: Content exceeds ${limitK}k character limit (${(text.length / 1000).toFixed(1)}k chars)`,
)
setPdfData((prev) => {
const next = new Map(prev)
next.delete(file)
return next
})
// Remove the file from the list
setFiles((prev) => prev.filter((f) => f !== file))
continue
}
setPdfData((prev) => {
const next = new Map(prev)
next.set(file, {
text,
charCount: text.length,
isExtracting: false,
})
return next
})
} catch (error) {
console.error("Failed to extract text:", error)
toast.error(`Failed to read file: ${file.name}`)
setPdfData((prev) => {
const next = new Map(prev)
next.delete(file)
return next
})
}
}
}
// Clean up pdfData for removed files
setPdfData((prev) => {
const next = new Map(prev)
for (const key of prev.keys()) {
if (!newFiles.includes(key)) {
next.delete(key)
}
}
return next
})
} }
const handleRegenerate = async (messageIndex: number) => { const handleRegenerate = async (messageIndex: number) => {
@@ -1228,6 +1352,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
}} }}
files={files} files={files}
onFileChange={handleFileChange} onFileChange={handleFileChange}
pdfData={pdfData}
showHistory={showHistory} showHistory={showHistory}
onToggleHistory={setShowHistory} onToggleHistory={setShowHistory}
sessionId={sessionId} sessionId={sessionId}

View File

@@ -1,15 +1,31 @@
"use client" "use client"
import { X } from "lucide-react" import { FileCode, FileText, Loader2, X } from "lucide-react"
import Image from "next/image" import Image from "next/image"
import { useEffect, useRef, useState } from "react" import { useEffect, useRef, useState } from "react"
import { isPdfFile, isTextFile } from "@/lib/pdf-utils"
function formatCharCount(count: number): string {
if (count >= 1000) {
return `${(count / 1000).toFixed(1)}k`
}
return String(count)
}
interface FilePreviewListProps { interface FilePreviewListProps {
files: File[] files: File[]
onRemoveFile: (fileToRemove: File) => void onRemoveFile: (fileToRemove: File) => void
pdfData?: Map<
File,
{ text: string; charCount: number; isExtracting: boolean }
>
} }
export function FilePreviewList({ files, onRemoveFile }: FilePreviewListProps) { export function FilePreviewList({
files,
onRemoveFile,
pdfData = new Map(),
}: FilePreviewListProps) {
const [selectedImage, setSelectedImage] = useState<string | null>(null) const [selectedImage, setSelectedImage] = useState<string | null>(null)
const [imageUrls, setImageUrls] = useState<Map<File, string>>(new Map()) const [imageUrls, setImageUrls] = useState<Map<File, string>>(new Map())
const imageUrlsRef = useRef<Map<File, string>>(new Map()) const imageUrlsRef = useRef<Map<File, string>>(new Map())
@@ -70,12 +86,19 @@ export function FilePreviewList({ files, onRemoveFile }: FilePreviewListProps) {
<div className="flex flex-wrap gap-2 mt-2 p-2 bg-muted/50 rounded-md"> <div className="flex flex-wrap gap-2 mt-2 p-2 bg-muted/50 rounded-md">
{files.map((file, index) => { {files.map((file, index) => {
const imageUrl = imageUrls.get(file) || null const imageUrl = imageUrls.get(file) || null
const pdfInfo = pdfData.get(file)
return ( return (
<div key={file.name + index} className="relative group"> <div key={file.name + index} className="relative group">
<div <div
className="w-20 h-20 border rounded-md overflow-hidden bg-muted cursor-pointer" className={`w-20 h-20 border rounded-md overflow-hidden bg-muted ${
file.type.startsWith("image/") && imageUrl
? "cursor-pointer"
: ""
}`}
onClick={() => onClick={() =>
imageUrl && setSelectedImage(imageUrl) file.type.startsWith("image/") &&
imageUrl &&
setSelectedImage(imageUrl)
} }
> >
{file.type.startsWith("image/") && imageUrl ? ( {file.type.startsWith("image/") && imageUrl ? (
@@ -87,6 +110,33 @@ export function FilePreviewList({ files, onRemoveFile }: FilePreviewListProps) {
className="object-cover w-full h-full" className="object-cover w-full h-full"
unoptimized unoptimized
/> />
) : isPdfFile(file) || isTextFile(file) ? (
<div className="flex flex-col items-center justify-center h-full p-1">
{pdfInfo?.isExtracting ? (
<Loader2 className="h-6 w-6 text-blue-500 mb-1 animate-spin" />
) : isPdfFile(file) ? (
<FileText className="h-6 w-6 text-red-500 mb-1" />
) : (
<FileCode className="h-6 w-6 text-blue-500 mb-1" />
)}
<span className="text-xs text-center truncate w-full px-1">
{file.name.length > 10
? `${file.name.slice(0, 7)}...`
: file.name}
</span>
{pdfInfo?.isExtracting ? (
<span className="text-[10px] text-muted-foreground">
Reading...
</span>
) : pdfInfo?.charCount ? (
<span className="text-[10px] text-green-600 font-medium">
{formatCharCount(
pdfInfo.charCount,
)}{" "}
chars
</span>
) : null}
</div>
) : ( ) : (
<div className="flex items-center justify-center h-full text-xs text-center p-1"> <div className="flex items-center justify-center h-full text-xs text-center p-1">
{file.name} {file.name}

View File

@@ -64,3 +64,8 @@ AI_MODEL=global.anthropic.claude-sonnet-4-5-20250929-v1:0
# Draw.io Configuration (Optional) # Draw.io Configuration (Optional)
# NEXT_PUBLIC_DRAWIO_BASE_URL=https://embed.diagrams.net # Default: https://embed.diagrams.net # NEXT_PUBLIC_DRAWIO_BASE_URL=https://embed.diagrams.net # Default: https://embed.diagrams.net
# Use this to point to a self-hosted draw.io instance # Use this to point to a self-hosted draw.io instance
# PDF Input Feature (Optional)
# Enable PDF file upload to extract text and generate diagrams
# Enabled by default. Set to "false" to disable.
# ENABLE_PDF_INPUT=true

View File

@@ -394,6 +394,366 @@ export const CACHED_EXAMPLE_RESPONSES: CachedResponse[] = [
</mxCell> </mxCell>
</root>`, </root>`,
}, },
{
promptText: "Summarize this paper as a diagram",
hasImage: true,
xml: ` <root>
<mxCell id="0" />
<mxCell id="1" parent="0" />
<mxCell id="title_bg" parent="1"
style="rounded=1;whiteSpace=wrap;html=1;fillColor=#1a237e;strokeColor=none;arcSize=8;"
value="" vertex="1">
<mxGeometry height="80" width="720" x="40" y="20" as="geometry" />
</mxCell>
<mxCell id="title" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=22;fontStyle=1;fontColor=#FFFFFF;"
value="Chain-of-Thought Prompting&lt;br&gt;&lt;font style=&quot;font-size: 14px;&quot;&gt;Elicits Reasoning in Large Language Models&lt;/font&gt;"
vertex="1">
<mxGeometry height="70" width="720" x="40" y="25" as="geometry" />
</mxCell>
<mxCell id="authors" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=11;fontColor=#666666;"
value="Wei et al. (Google Research, Brain Team) | NeurIPS 2022" vertex="1">
<mxGeometry height="20" width="720" x="40" y="100" as="geometry" />
</mxCell>
<mxCell id="core_header" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=16;fontStyle=1;fontColor=#1a237e;"
value="💡 Core Idea" vertex="1">
<mxGeometry height="30" width="150" x="40" y="125" as="geometry" />
</mxCell>
<mxCell id="core_box" parent="1"
style="rounded=1;whiteSpace=wrap;html=1;fillColor=#E3F2FD;strokeColor=#1565C0;align=left;spacingLeft=10;spacingRight=10;fontSize=11;"
value="&lt;b&gt;Chain of Thought&lt;/b&gt; = A series of intermediate reasoning steps that lead to the final answer&lt;br&gt;&lt;br&gt;Simply provide a few CoT demonstrations as exemplars in few-shot prompting"
vertex="1">
<mxGeometry height="75" width="340" x="40" y="155" as="geometry" />
</mxCell>
<mxCell id="compare_header" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=16;fontStyle=1;fontColor=#1a237e;"
value="⚖️ Standard vs Chain-of-Thought Prompting" vertex="1">
<mxGeometry height="30" width="350" x="40" y="240" as="geometry" />
</mxCell>
<mxCell id="std_box" parent="1"
style="rounded=1;whiteSpace=wrap;html=1;fillColor=#FFEBEE;strokeColor=#C62828;arcSize=8;"
value="" vertex="1">
<mxGeometry height="160" width="170" x="40" y="275" as="geometry" />
</mxCell>
<mxCell id="std_title" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=12;fontStyle=1;fontColor=#C62828;"
value="Standard Prompting" vertex="1">
<mxGeometry height="25" width="170" x="40" y="280" as="geometry" />
</mxCell>
<mxCell id="std_q" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=top;whiteSpace=wrap;rounded=0;fontSize=9;spacingLeft=5;spacingRight=5;"
value="Q: Roger has 5 tennis balls. He buys 2 more cans. Each can has 3 balls. How many now?"
vertex="1">
<mxGeometry height="55" width="160" x="45" y="305" as="geometry" />
</mxCell>
<mxCell id="std_a" parent="1"
style="text;html=1;strokeColor=none;fillColor=#FFCDD2;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=1;fontSize=10;fontStyle=1;spacingLeft=5;"
value="A: The answer is 11." vertex="1">
<mxGeometry height="25" width="150" x="50" y="365" as="geometry" />
</mxCell>
<mxCell id="std_result" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=11;fontStyle=1;fontColor=#C62828;"
value="❌ Often Wrong" vertex="1">
<mxGeometry height="30" width="170" x="40" y="400" as="geometry" />
</mxCell>
<mxCell id="cot_box" parent="1"
style="rounded=1;whiteSpace=wrap;html=1;fillColor=#E8F5E9;strokeColor=#2E7D32;arcSize=8;"
value="" vertex="1">
<mxGeometry height="160" width="170" x="220" y="275" as="geometry" />
</mxCell>
<mxCell id="cot_title" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=12;fontStyle=1;fontColor=#2E7D32;"
value="Chain-of-Thought" vertex="1">
<mxGeometry height="25" width="170" x="220" y="280" as="geometry" />
</mxCell>
<mxCell id="cot_q" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=top;whiteSpace=wrap;rounded=0;fontSize=9;spacingLeft=5;spacingRight=5;"
value="Q: Roger has 5 tennis balls. He buys 2 more cans. Each can has 3 balls. How many now?"
vertex="1">
<mxGeometry height="55" width="160" x="225" y="305" as="geometry" />
</mxCell>
<mxCell id="cot_a" parent="1"
style="text;html=1;strokeColor=none;fillColor=#C8E6C9;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=1;fontSize=9;fontStyle=1;spacingLeft=5;"
value="A: 2 cans × 3 = 6 balls.&lt;br&gt;5 + 6 = 11. Answer: 11" vertex="1">
<mxGeometry height="35" width="150" x="230" y="360" as="geometry" />
</mxCell>
<mxCell id="cot_result" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=11;fontStyle=1;fontColor=#2E7D32;"
value="✓ Correct!" vertex="1">
<mxGeometry height="30" width="170" x="220" y="400" as="geometry" />
</mxCell>
<mxCell id="vs_arrow" edge="1" parent="1"
style="shape=flexArrow;endArrow=classic;startArrow=classic;html=1;fillColor=#FFC107;strokeColor=none;width=8;endSize=4;startSize=4;"
value="">
<mxGeometry relative="1" width="100" as="geometry">
<mxPoint x="195" y="355" as="sourcePoint" />
<mxPoint x="235" y="355" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="props_header" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=16;fontStyle=1;fontColor=#1a237e;"
value="🔑 Key Properties" vertex="1">
<mxGeometry height="30" width="150" x="400" y="125" as="geometry" />
</mxCell>
<mxCell id="prop1" parent="1"
style="rounded=1;whiteSpace=wrap;html=1;fillColor=#FFF3E0;strokeColor=#EF6C00;fontSize=10;align=left;spacingLeft=8;"
value="1⃣ Decomposes multi-step problems" vertex="1">
<mxGeometry height="32" width="180" x="400" y="155" as="geometry" />
</mxCell>
<mxCell id="prop2" parent="1"
style="rounded=1;whiteSpace=wrap;html=1;fillColor=#FFF3E0;strokeColor=#EF6C00;fontSize=10;align=left;spacingLeft=8;"
value="2⃣ Interpretable reasoning window" vertex="1">
<mxGeometry height="32" width="180" x="400" y="192" as="geometry" />
</mxCell>
<mxCell id="prop3" parent="1"
style="rounded=1;whiteSpace=wrap;html=1;fillColor=#FFF3E0;strokeColor=#EF6C00;fontSize=10;align=left;spacingLeft=8;"
value="3⃣ Applicable to any language task" vertex="1">
<mxGeometry height="32" width="180" x="400" y="229" as="geometry" />
</mxCell>
<mxCell id="prop4" parent="1"
style="rounded=1;whiteSpace=wrap;html=1;fillColor=#FFF3E0;strokeColor=#EF6C00;fontSize=10;align=left;spacingLeft=8;"
value="4⃣ No finetuning required" vertex="1">
<mxGeometry height="32" width="180" x="400" y="266" as="geometry" />
</mxCell>
<mxCell id="emergent_header" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=16;fontStyle=1;fontColor=#1a237e;"
value="📈 Emergent Ability" vertex="1">
<mxGeometry height="30" width="180" x="400" y="310" as="geometry" />
</mxCell>
<mxCell id="emergent_box" parent="1"
style="rounded=1;whiteSpace=wrap;html=1;fillColor=#F3E5F5;strokeColor=#7B1FA2;arcSize=8;"
value="" vertex="1">
<mxGeometry height="95" width="180" x="400" y="340" as="geometry" />
</mxCell>
<mxCell id="emergent_text" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=11;"
value="CoT only works with&lt;br&gt;&lt;b&gt;~100B+ parameters&lt;/b&gt;&lt;br&gt;&lt;br&gt;Small models produce&lt;br&gt;fluent but illogical chains"
vertex="1">
<mxGeometry height="85" width="180" x="400" y="345" as="geometry" />
</mxCell>
<mxCell id="results_header" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=16;fontStyle=1;fontColor=#1a237e;"
value="📊 Key Results" vertex="1">
<mxGeometry height="30" width="150" x="600" y="125" as="geometry" />
</mxCell>
<mxCell id="gsm_box" parent="1"
style="rounded=1;whiteSpace=wrap;html=1;fillColor=#E8F5E9;strokeColor=#2E7D32;arcSize=8;"
value="" vertex="1">
<mxGeometry height="100" width="160" x="600" y="155" as="geometry" />
</mxCell>
<mxCell id="gsm_title" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=12;fontStyle=1;fontColor=#2E7D32;"
value="GSM8K (Math)" vertex="1">
<mxGeometry height="20" width="160" x="600" y="160" as="geometry" />
</mxCell>
<mxCell id="gsm_bar1" parent="1"
style="rounded=0;whiteSpace=wrap;html=1;fillColor=#FFCDD2;strokeColor=none;"
value="" vertex="1">
<mxGeometry height="30" width="40" x="615" y="185" as="geometry" />
</mxCell>
<mxCell id="gsm_bar2" parent="1"
style="rounded=0;whiteSpace=wrap;html=1;fillColor=#4CAF50;strokeColor=none;"
value="" vertex="1">
<mxGeometry height="30" width="80" x="665" y="185" as="geometry" />
</mxCell>
<mxCell id="gsm_label1" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=10;fontStyle=1;"
value="18%" vertex="1">
<mxGeometry height="15" width="40" x="615" y="215" as="geometry" />
</mxCell>
<mxCell id="gsm_label2" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=10;fontStyle=1;fontColor=#2E7D32;"
value="57%" vertex="1">
<mxGeometry height="15" width="80" x="665" y="215" as="geometry" />
</mxCell>
<mxCell id="gsm_legend" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=9;fontColor=#666666;"
value="Standard → CoT (PaLM 540B)" vertex="1">
<mxGeometry height="20" width="160" x="600" y="232" as="geometry" />
</mxCell>
<mxCell id="bench_header" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=16;fontStyle=1;fontColor=#1a237e;"
value="🧪 Benchmarks Tested" vertex="1">
<mxGeometry height="30" width="180" x="600" y="265" as="geometry" />
</mxCell>
<mxCell id="bench_arith" parent="1"
style="rounded=1;whiteSpace=wrap;html=1;fillColor=#E3F2FD;strokeColor=#1565C0;fontSize=10;align=center;"
value="🔢 Arithmetic&lt;br&gt;&lt;font style=&quot;font-size: 9px;&quot;&gt;GSM8K, SVAMP, ASDiv, AQuA, MAWPS&lt;/font&gt;"
vertex="1">
<mxGeometry height="45" width="160" x="600" y="295" as="geometry" />
</mxCell>
<mxCell id="bench_common" parent="1"
style="rounded=1;whiteSpace=wrap;html=1;fillColor=#E3F2FD;strokeColor=#1565C0;fontSize=10;align=center;"
value="🧠 Commonsense&lt;br&gt;&lt;font style=&quot;font-size: 9px;&quot;&gt;CSQA, StrategyQA, Date, Sports, SayCan&lt;/font&gt;"
vertex="1">
<mxGeometry height="45" width="160" x="600" y="345" as="geometry" />
</mxCell>
<mxCell id="bench_symbol" parent="1"
style="rounded=1;whiteSpace=wrap;html=1;fillColor=#E3F2FD;strokeColor=#1565C0;fontSize=10;align=center;"
value="🔣 Symbolic&lt;br&gt;&lt;font style=&quot;font-size: 9px;&quot;&gt;Last Letter Concat, Coin Flip&lt;/font&gt;"
vertex="1">
<mxGeometry height="40" width="160" x="600" y="395" as="geometry" />
</mxCell>
<mxCell id="task_header" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=16;fontStyle=1;fontColor=#1a237e;"
value="🎯 Task Types &amp; Results" vertex="1">
<mxGeometry height="30" width="200" x="40" y="445" as="geometry" />
</mxCell>
<mxCell id="task_arith" parent="1"
style="ellipse;whiteSpace=wrap;html=1;fillColor=#BBDEFB;strokeColor=#1565C0;fontSize=11;fontStyle=1;"
value="Arithmetic&lt;br&gt;Reasoning" vertex="1">
<mxGeometry height="60" width="90" x="40" y="480" as="geometry" />
</mxCell>
<mxCell id="task_arith_res" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=top;whiteSpace=wrap;rounded=0;fontSize=9;fontColor=#1565C0;"
value="SOTA on GSM8K&lt;br&gt;(57% vs 55% prior)" vertex="1">
<mxGeometry height="30" width="110" x="30" y="540" as="geometry" />
</mxCell>
<mxCell id="task_common" parent="1"
style="ellipse;whiteSpace=wrap;html=1;fillColor=#C8E6C9;strokeColor=#2E7D32;fontSize=11;fontStyle=1;"
value="Commonsense&lt;br&gt;Reasoning" vertex="1">
<mxGeometry height="60" width="90" x="160" y="480" as="geometry" />
</mxCell>
<mxCell id="task_common_res" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=top;whiteSpace=wrap;rounded=0;fontSize=9;fontColor=#2E7D32;"
value="SOTA StrategyQA&lt;br&gt;(75.6% vs 69.4%)" vertex="1">
<mxGeometry height="30" width="110" x="150" y="540" as="geometry" />
</mxCell>
<mxCell id="task_symbol" parent="1"
style="ellipse;whiteSpace=wrap;html=1;fillColor=#FFE0B2;strokeColor=#EF6C00;fontSize=11;fontStyle=1;"
value="Symbolic&lt;br&gt;Reasoning" vertex="1">
<mxGeometry height="60" width="90" x="280" y="480" as="geometry" />
</mxCell>
<mxCell id="task_symbol_res" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=top;whiteSpace=wrap;rounded=0;fontSize=9;fontColor=#EF6C00;"
value="OOD Generalization&lt;br&gt;to longer sequences" vertex="1">
<mxGeometry height="30" width="110" x="270" y="540" as="geometry" />
</mxCell>
<mxCell id="task_arrow1" edge="1" parent="1"
style="endArrow=classic;html=1;strokeColor=#9E9E9E;strokeWidth=2;" value="">
<mxGeometry height="50" relative="1" width="50" as="geometry">
<mxPoint x="130" y="510" as="sourcePoint" />
<mxPoint x="160" y="510" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="task_arrow2" edge="1" parent="1"
style="endArrow=classic;html=1;strokeColor=#9E9E9E;strokeWidth=2;" value="">
<mxGeometry height="50" relative="1" width="50" as="geometry">
<mxPoint x="250" y="510" as="sourcePoint" />
<mxPoint x="280" y="510" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="models_header" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=16;fontStyle=1;fontColor=#1a237e;"
value="🤖 Models Tested" vertex="1">
<mxGeometry height="30" width="150" x="400" y="445" as="geometry" />
</mxCell>
<mxCell id="models_box" parent="1"
style="rounded=1;whiteSpace=wrap;html=1;fillColor=#ECEFF1;strokeColor=#607D8B;arcSize=8;"
value="" vertex="1">
<mxGeometry height="95" width="180" x="400" y="475" as="geometry" />
</mxCell>
<mxCell id="model1" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=11;spacingLeft=10;"
value="• GPT-3 (175B)" vertex="1">
<mxGeometry height="20" width="90" x="400" y="480" as="geometry" />
</mxCell>
<mxCell id="model2" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=11;spacingLeft=10;"
value="• LaMDA (137B)" vertex="1">
<mxGeometry height="20" width="90" x="400" y="500" as="geometry" />
</mxCell>
<mxCell id="model3" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=11;spacingLeft=10;"
value="• PaLM (540B)" vertex="1">
<mxGeometry height="20" width="90" x="400" y="520" as="geometry" />
</mxCell>
<mxCell id="model4" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=11;spacingLeft=10;"
value="• Codex" vertex="1">
<mxGeometry height="20" width="80" x="490" y="480" as="geometry" />
</mxCell>
<mxCell id="model5" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=11;spacingLeft=10;"
value="• UL2 (20B)" vertex="1">
<mxGeometry height="20" width="80" x="490" y="500" as="geometry" />
</mxCell>
<mxCell id="model_note" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=10;fontStyle=2;fontColor=#607D8B;"
value="No finetuning - prompting only!" vertex="1">
<mxGeometry height="20" width="180" x="400" y="545" as="geometry" />
</mxCell>
<mxCell id="takeaway_header" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=16;fontStyle=1;fontColor=#1a237e;"
value="✨ Key Takeaways" vertex="1">
<mxGeometry height="30" width="160" x="600" y="445" as="geometry" />
</mxCell>
<mxCell id="takeaway_box" parent="1"
style="rounded=1;whiteSpace=wrap;html=1;fillColor=#FFF8E1;strokeColor=#FFA000;arcSize=8;"
value="" vertex="1">
<mxGeometry height="95" width="160" x="600" y="475" as="geometry" />
</mxCell>
<mxCell id="take1" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=10;spacingLeft=5;"
value="✓ Simple yet powerful" vertex="1">
<mxGeometry height="18" width="150" x="605" y="480" as="geometry" />
</mxCell>
<mxCell id="take2" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=10;spacingLeft=5;"
value="✓ Emergent at scale" vertex="1">
<mxGeometry height="18" width="150" x="605" y="498" as="geometry" />
</mxCell>
<mxCell id="take3" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=10;spacingLeft=5;"
value="✓ Broadly applicable" vertex="1">
<mxGeometry height="18" width="150" x="605" y="516" as="geometry" />
</mxCell>
<mxCell id="take4" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=10;spacingLeft=5;"
value="✓ No training needed" vertex="1">
<mxGeometry height="18" width="150" x="605" y="534" as="geometry" />
</mxCell>
<mxCell id="take5" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=10;spacingLeft=5;"
value="✓ State-of-the-art results" vertex="1">
<mxGeometry height="18" width="150" x="605" y="552" as="geometry" />
</mxCell>
<mxCell id="format_header" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=14;fontStyle=1;fontColor=#1a237e;"
value="📝 Prompt Format" vertex="1">
<mxGeometry height="25" width="150" x="40" y="575" as="geometry" />
</mxCell>
<mxCell id="format_box" parent="1"
style="rounded=1;whiteSpace=wrap;html=1;fillColor=#E1BEE7;strokeColor=#7B1FA2;fontSize=12;fontStyle=1;"
value="〈 Input, Chain of Thought, Output 〉" vertex="1">
<mxGeometry height="35" width="250" x="40" y="600" as="geometry" />
</mxCell>
<mxCell id="limit_header" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=14;fontStyle=1;fontColor=#1a237e;"
value="⚠️ Limitations" vertex="1">
<mxGeometry height="25" width="120" x="310" y="575" as="geometry" />
</mxCell>
<mxCell id="limit_box" parent="1"
style="rounded=1;whiteSpace=wrap;html=1;fillColor=#FFEBEE;strokeColor=#C62828;fontSize=10;align=left;spacingLeft=8;"
value="• Requires large models (~100B+)&lt;br&gt;• No guarantee of correct reasoning&lt;br&gt;• Costly to serve in production"
vertex="1">
<mxGeometry height="55" width="200" x="310" y="600" as="geometry" />
</mxCell>
<mxCell id="impact_header" parent="1"
style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=14;fontStyle=1;fontColor=#1a237e;"
value="🚀 Impact" vertex="1">
<mxGeometry height="25" width="100" x="530" y="575" as="geometry" />
</mxCell>
<mxCell id="impact_box" parent="1"
style="rounded=1;whiteSpace=wrap;html=1;fillColor=#E8F5E9;strokeColor=#2E7D32;fontSize=10;align=left;spacingLeft=8;spacingRight=8;"
value="Foundational technique for modern LLM reasoning - inspired many follow-up works including Self-Consistency, Tree-of-Thought, etc."
vertex="1">
<mxGeometry height="55" width="230" x="530" y="600" as="geometry" />
</mxCell>
</root>`,
},
{ {
promptText: "Draw a cat for me", promptText: "Draw a cat for me",
hasImage: false, hasImage: false,

72
lib/pdf-utils.ts Normal file
View File

@@ -0,0 +1,72 @@
import { extractText, getDocumentProxy } from "unpdf"
// Maximum characters allowed for extracted text
export const MAX_EXTRACTED_CHARS = 150000 // 150k chars
// Text file extensions we support
const TEXT_EXTENSIONS = [
".txt",
".md",
".markdown",
".json",
".csv",
".xml",
".html",
".css",
".js",
".ts",
".jsx",
".tsx",
".py",
".java",
".c",
".cpp",
".h",
".go",
".rs",
".yaml",
".yml",
".toml",
".ini",
".log",
".sh",
".bash",
".zsh",
]
/**
* Extract text content from a PDF file
* Uses unpdf library for client-side extraction
*/
export async function extractPdfText(file: File): Promise<string> {
const buffer = await file.arrayBuffer()
const pdf = await getDocumentProxy(new Uint8Array(buffer))
const { text } = await extractText(pdf, { mergePages: true })
return text as string
}
/**
* Check if a file is a PDF
*/
export function isPdfFile(file: File): boolean {
return file.type === "application/pdf" || file.name.endsWith(".pdf")
}
/**
* Check if a file is a text file
*/
export function isTextFile(file: File): boolean {
const name = file.name.toLowerCase()
return (
file.type.startsWith("text/") ||
file.type === "application/json" ||
TEXT_EXTENSIONS.some((ext) => name.endsWith(ext))
)
}
/**
* Extract text content from a text file
*/
export async function extractTextFileContent(file: File): Promise<string> {
return await file.text()
}

View File

@@ -10,10 +10,10 @@
export const DEFAULT_SYSTEM_PROMPT = ` export const DEFAULT_SYSTEM_PROMPT = `
You are an expert diagram creation assistant specializing in draw.io XML generation. You are an expert diagram creation assistant specializing in draw.io XML generation.
Your primary function is chat with user and crafting clear, well-organized visual diagrams through precise XML specifications. Your primary function is chat with user and crafting clear, well-organized visual diagrams through precise XML specifications.
You can see the image that user uploaded. You can see images that users upload, and you can read the text content extracted from PDF documents they upload.
When you are asked to create a diagram, you must first tell user you plan in text first. Plan the layout and structure that can avoid object overlapping or edge cross the objects. When you are asked to create a diagram, briefly describe your plan about the layout and structure to avoid object overlapping or edge cross the objects. (2-3 sentences max), then use display_diagram tool to generate the XML.
Then use display_diagram tool to generate the full draw.io XML for the entire diagram. After generating or editing a diagram, you don't need to say anything. The user can see the diagram - no need to describe it.
## App Context ## App Context
You are an AI agent (powered by {{MODEL_NAME}}) inside a web app. The interface has: You are an AI agent (powered by {{MODEL_NAME}}) inside a web app. The interface has:
@@ -25,7 +25,7 @@ You can read and modify diagrams by generating draw.io XML code through tool cal
## App Features ## App Features
1. **Diagram History** (clock icon, bottom-left of chat input): The app automatically saves a snapshot before each AI edit. Users can view the history panel and restore any previous version. Feel free to make changes - nothing is permanently lost. 1. **Diagram History** (clock icon, bottom-left of chat input): The app automatically saves a snapshot before each AI edit. Users can view the history panel and restore any previous version. Feel free to make changes - nothing is permanently lost.
2. **Theme Toggle** (palette icon, bottom-left of chat input): Users can switch between minimal UI and sketch-style UI for the draw.io editor. 2. **Theme Toggle** (palette icon, bottom-left of chat input): Users can switch between minimal UI and sketch-style UI for the draw.io editor.
3. **Image Upload** (paperclip icon, bottom-left of chat input): Users can upload images for you to analyze and replicate as diagrams. 3. **Image/PDF Upload** (paperclip icon, bottom-left of chat input): Users can upload images or PDF documents for you to analyze and generate diagrams from.
4. **Export** (via draw.io toolbar): Users can save diagrams as .drawio, .svg, or .png files. 4. **Export** (via draw.io toolbar): Users can save diagrams as .drawio, .svg, or .png files.
5. **Clear Chat** (trash icon, bottom-right of chat input): Clears the conversation and resets the diagram. 5. **Clear Chat** (trash icon, bottom-right of chat input): Clears the conversation and resets the diagram.

15
package-lock.json generated
View File

@@ -54,6 +54,7 @@
"sonner": "^2.0.7", "sonner": "^2.0.7",
"tailwind-merge": "^3.0.2", "tailwind-merge": "^3.0.2",
"tailwindcss-animate": "^1.0.7", "tailwindcss-animate": "^1.0.7",
"unpdf": "^1.4.0",
"zod": "^4.1.12" "zod": "^4.1.12"
}, },
"devDependencies": { "devDependencies": {
@@ -12514,6 +12515,20 @@
"url": "https://opencollective.com/unified" "url": "https://opencollective.com/unified"
} }
}, },
"node_modules/unpdf": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/unpdf/-/unpdf-1.4.0.tgz",
"integrity": "sha512-TahIk0xdH/4jh/MxfclzU79g40OyxtP00VnEUZdEkJoYtXAHWLiir6t3FC6z3vDqQTzc2ZHcla6uEiVTNjejuA==",
"license": "MIT",
"peerDependencies": {
"@napi-rs/canvas": "^0.1.69"
},
"peerDependenciesMeta": {
"@napi-rs/canvas": {
"optional": true
}
}
},
"node_modules/unrs-resolver": { "node_modules/unrs-resolver": {
"version": "1.11.1", "version": "1.11.1",
"resolved": "https://registry.npmjs.org/unrs-resolver/-/unrs-resolver-1.11.1.tgz", "resolved": "https://registry.npmjs.org/unrs-resolver/-/unrs-resolver-1.11.1.tgz",

View File

@@ -58,6 +58,7 @@
"sonner": "^2.0.7", "sonner": "^2.0.7",
"tailwind-merge": "^3.0.2", "tailwind-merge": "^3.0.2",
"tailwindcss-animate": "^1.0.7", "tailwindcss-animate": "^1.0.7",
"unpdf": "^1.4.0",
"zod": "^4.1.12" "zod": "^4.1.12"
}, },
"lint-staged": { "lint-staged": {

File diff suppressed because one or more lines are too long