feat: add PDF and text file upload support (#205)

- Add client-side PDF text extraction using unpdf library
- Support text files (.txt, .md, .json, .csv, .py, .js, .ts, etc.)
- Add file preview with character count for PDF/text files
- Add 150k character limit for extracted content
- Highlight Paper to Diagram example with NEW badge
- Fix React hydration error by adding explicit IDs to ResizablePanelGroup
- Remove code duplication by centralizing file utilities in pdf-utils.ts
This commit is contained in:
Dayuan Jiang
2025-12-10 21:32:35 +09:00
committed by GitHub
parent 43e5993f47
commit d2ba133eaf
14 changed files with 940 additions and 57 deletions

View File

@@ -1,28 +1,52 @@
"use client"
import { Cloud, GitBranch, Palette, Zap } from "lucide-react"
import { Cloud, FileText, GitBranch, Palette, Zap } from "lucide-react"
interface ExampleCardProps {
icon: React.ReactNode
title: string
description: string
onClick: () => void
isNew?: boolean
}
function ExampleCard({ icon, title, description, onClick }: ExampleCardProps) {
function ExampleCard({
icon,
title,
description,
onClick,
isNew,
}: ExampleCardProps) {
return (
<button
onClick={onClick}
className="group w-full text-left p-4 rounded-xl border border-border/60 bg-card hover:bg-accent/50 hover:border-primary/30 transition-all duration-200 hover:shadow-sm"
className={`group w-full text-left p-4 rounded-xl border bg-card hover:bg-accent/50 hover:border-primary/30 transition-all duration-200 hover:shadow-sm ${
isNew
? "border-primary/40 ring-1 ring-primary/20"
: "border-border/60"
}`}
>
<div className="flex items-start gap-3">
<div className="w-9 h-9 rounded-lg bg-primary/10 flex items-center justify-center shrink-0 group-hover:bg-primary/15 transition-colors">
<div
className={`w-9 h-9 rounded-lg flex items-center justify-center shrink-0 transition-colors ${
isNew
? "bg-primary/20 group-hover:bg-primary/25"
: "bg-primary/10 group-hover:bg-primary/15"
}`}
>
{icon}
</div>
<div className="min-w-0">
<h3 className="text-sm font-medium text-foreground group-hover:text-primary transition-colors">
{title}
</h3>
<div className="flex items-center gap-2">
<h3 className="text-sm font-medium text-foreground group-hover:text-primary transition-colors">
{title}
</h3>
{isNew && (
<span className="px-1.5 py-0.5 text-[10px] font-semibold bg-primary text-primary-foreground rounded">
NEW
</span>
)}
</div>
<p className="text-xs text-muted-foreground mt-0.5 line-clamp-2">
{description}
</p>
@@ -67,6 +91,21 @@ export default function ExamplePanel({
}
}
const handlePdfExample = async () => {
setInput("Summarize this paper as a diagram")
try {
const response = await fetch("/chain-of-thought.txt")
const blob = await response.blob()
const file = new File([blob], "chain-of-thought.txt", {
type: "text/plain",
})
setFiles([file])
} catch (error) {
console.error("Error loading text file:", error)
}
}
return (
<div className="py-6 px-2 animate-fade-in">
{/* Welcome section */}
@@ -87,6 +126,14 @@ export default function ExamplePanel({
</p>
<div className="grid gap-2">
<ExampleCard
icon={<FileText className="w-4 h-4 text-primary" />}
title="Paper to Diagram"
description="Upload .pdf, .txt, .md, .json, .csv, .py, .js, .ts and more"
onClick={handlePdfExample}
isNew
/>
<ExampleCard
icon={<Zap className="w-4 h-4 text-primary" />}
title="Animated Diagram"

View File

@@ -19,11 +19,16 @@ import { SaveDialog } from "@/components/save-dialog"
import { Button } from "@/components/ui/button"
import { Textarea } from "@/components/ui/textarea"
import { useDiagram } from "@/contexts/diagram-context"
import { isPdfFile, isTextFile } from "@/lib/pdf-utils"
import { FilePreviewList } from "./file-preview-list"
const MAX_FILE_SIZE = 2 * 1024 * 1024 // 2MB
const MAX_IMAGE_SIZE = 2 * 1024 * 1024 // 2MB
const MAX_FILES = 5
function isValidFileType(file: File): boolean {
return file.type.startsWith("image/") || isPdfFile(file) || isTextFile(file)
}
function formatFileSize(bytes: number): string {
const mb = bytes / 1024 / 1024
if (mb < 0.01) return `${(bytes / 1024).toFixed(0)}KB`
@@ -63,9 +68,16 @@ function validateFiles(
errors.push(`Only ${availableSlots} more file(s) allowed`)
break
}
if (file.size > MAX_FILE_SIZE) {
if (!isValidFileType(file)) {
errors.push(`"${file.name}" is not a supported file type`)
continue
}
// Only check size for images (PDFs/text files are extracted client-side, so file size doesn't matter)
const isExtractedFile = isPdfFile(file) || isTextFile(file)
if (!isExtractedFile && file.size > MAX_IMAGE_SIZE) {
const maxSizeMB = MAX_IMAGE_SIZE / 1024 / 1024
errors.push(
`"${file.name}" is ${formatFileSize(file.size)} (exceeds 2MB)`,
`"${file.name}" is ${formatFileSize(file.size)} (exceeds ${maxSizeMB}MB)`,
)
} else {
validFiles.push(file)
@@ -109,6 +121,10 @@ interface ChatInputProps {
onClearChat: () => void
files?: File[]
onFileChange?: (files: File[]) => void
pdfData?: Map<
File,
{ text: string; charCount: number; isExtracting: boolean }
>
showHistory?: boolean
onToggleHistory?: (show: boolean) => void
sessionId?: string
@@ -123,6 +139,7 @@ export function ChatInput({
onClearChat,
files = [],
onFileChange = () => {},
pdfData = new Map(),
showHistory = false,
onToggleHistory = () => {},
sessionId,
@@ -245,11 +262,14 @@ export function ChatInput({
if (isDisabled) return
const droppedFiles = e.dataTransfer.files
const imageFiles = Array.from(droppedFiles).filter((file) =>
file.type.startsWith("image/"),
const supportedFiles = Array.from(droppedFiles).filter((file) =>
isValidFileType(file),
)
const { validFiles, errors } = validateFiles(imageFiles, files.length)
const { validFiles, errors } = validateFiles(
supportedFiles,
files.length,
)
showValidationErrors(errors)
if (validFiles.length > 0) {
onFileChange([...files, ...validFiles])
@@ -279,6 +299,7 @@ export function ChatInput({
<FilePreviewList
files={files}
onRemoveFile={handleRemoveFile}
pdfData={pdfData}
/>
</div>
)}
@@ -291,7 +312,7 @@ export function ChatInput({
onChange={handleChange}
onKeyDown={handleKeyDown}
onPaste={handlePaste}
placeholder="Describe your diagram or paste an image..."
placeholder="Describe your diagram or upload a file..."
disabled={isDisabled}
aria-label="Chat input"
className="min-h-[60px] max-h-[200px] resize-none border-0 bg-transparent px-4 py-3 text-sm focus-visible:ring-0 focus-visible:ring-offset-0 placeholder:text-muted-foreground/60"
@@ -367,7 +388,7 @@ export function ChatInput({
size="sm"
onClick={triggerFileInput}
disabled={isDisabled}
tooltipContent="Upload image"
tooltipContent="Upload file (image, PDF, text)"
className="h-8 w-8 p-0 text-muted-foreground hover:text-foreground"
>
<ImageIcon className="h-4 w-4" />
@@ -378,7 +399,7 @@ export function ChatInput({
ref={fileInputRef}
className="hidden"
onChange={handleFileChange}
accept="image/*"
accept="image/*,.pdf,application/pdf,text/*,.md,.markdown,.json,.csv,.xml,.yaml,.yml,.toml"
multiple
disabled={isDisabled}
/>

View File

@@ -8,6 +8,8 @@ import {
ChevronUp,
Copy,
Cpu,
FileCode,
FileText,
Minus,
Pencil,
Plus,
@@ -89,6 +91,59 @@ function EditDiffDisplay({ edits }: { edits: EditPair[] }) {
import { useDiagram } from "@/contexts/diagram-context"
// Helper to split text content into regular text and file sections (PDF or text files)
interface TextSection {
type: "text" | "file"
content: string
filename?: string
charCount?: number
fileType?: "pdf" | "text"
}
function splitTextIntoFileSections(text: string): TextSection[] {
const sections: TextSection[] = []
// Match [PDF: filename] or [File: filename] patterns
const filePattern =
/\[(PDF|File):\s*([^\]]+)\]\n([\s\S]*?)(?=\n\n\[(PDF|File):|$)/g
let lastIndex = 0
let match
while ((match = filePattern.exec(text)) !== null) {
// Add text before this file section
const beforeText = text.slice(lastIndex, match.index).trim()
if (beforeText) {
sections.push({ type: "text", content: beforeText })
}
// Add file section
const fileType = match[1].toLowerCase() === "pdf" ? "pdf" : "text"
const filename = match[2].trim()
const fileContent = match[3].trim()
sections.push({
type: "file",
content: fileContent,
filename,
charCount: fileContent.length,
fileType,
})
lastIndex = match.index + match[0].length
}
// Add remaining text after last file section
const remainingText = text.slice(lastIndex).trim()
if (remainingText) {
sections.push({ type: "text", content: remainingText })
}
// If no file sections found, return original text
if (sections.length === 0) {
sections.push({ type: "text", content: text })
}
return sections
}
const getMessageTextContent = (message: UIMessage): string => {
if (!message.parts) return ""
return message.parts
@@ -97,6 +152,14 @@ const getMessageTextContent = (message: UIMessage): string => {
.join("\n")
}
// Get only the user's original text, excluding appended file content
const getUserOriginalText = (message: UIMessage): string => {
const fullText = getMessageTextContent(message)
// Strip out [PDF: ...] and [File: ...] sections that were appended
const filePattern = /\n\n\[(PDF|File):\s*[^\]]+\]\n[\s\S]*$/
return fullText.replace(filePattern, "").trim()
}
interface ChatMessageDisplayProps {
messages: UIMessage[]
setInput: (input: string) => void
@@ -131,6 +194,10 @@ export function ChatMessageDisplay({
)
const editTextareaRef = useRef<HTMLTextAreaElement>(null)
const [editText, setEditText] = useState<string>("")
// Track which PDF sections are expanded (key: messageId-sectionIndex)
const [expandedPdfSections, setExpandedPdfSections] = useState<
Record<string, boolean>
>({})
const copyMessageToClipboard = async (messageId: string, text: string) => {
try {
@@ -391,7 +458,9 @@ export function ChatMessageDisplay({
message.id,
)
setEditText(
userMessageText,
getUserOriginalText(
message,
),
)
}}
className="p-1.5 rounded-lg text-muted-foreground/60 hover:text-muted-foreground hover:bg-muted transition-colors"
@@ -607,7 +676,9 @@ export function ChatMessageDisplay({
message.id,
)
setEditText(
userMessageText,
getUserOriginalText(
message,
),
)
}
}}
@@ -627,7 +698,9 @@ export function ChatMessageDisplay({
message.id,
)
setEditText(
userMessageText,
getUserOriginalText(
message,
),
)
}
}}
@@ -649,26 +722,126 @@ export function ChatMessageDisplay({
part.type ===
"text"
) {
const textContent =
(
part as {
text: string
}
)
.text
const sections =
splitTextIntoFileSections(
textContent,
)
return (
<div
key={`${message.id}-text-${group.startIndex}-${partIndex}`}
className={`prose prose-sm max-w-none break-words [&>*:first-child]:mt-0 [&>*:last-child]:mb-0 ${
message.role ===
"user"
? "[&_*]:!text-primary-foreground prose-code:bg-white/20"
: "dark:prose-invert"
}`}
className="space-y-2"
>
<ReactMarkdown>
{
(
part as {
text: string
}
{sections.map(
(
section,
sectionIndex,
) => {
if (
section.type ===
"file"
) {
const pdfKey = `${message.id}-file-${partIndex}-${sectionIndex}`
const isExpanded =
expandedPdfSections[
pdfKey
] ??
false
const charDisplay =
section.charCount &&
section.charCount >=
1000
? `${(section.charCount / 1000).toFixed(1)}k`
: section.charCount
return (
<div
key={
pdfKey
}
className="rounded-lg border border-border/60 bg-muted/30 overflow-hidden"
>
<button
type="button"
onClick={(
e,
) => {
e.stopPropagation()
setExpandedPdfSections(
(
prev,
) => ({
...prev,
[pdfKey]:
!isExpanded,
}),
)
}}
className="w-full flex items-center justify-between px-3 py-2 hover:bg-muted/50 transition-colors"
>
<div className="flex items-center gap-2">
{section.fileType ===
"pdf" ? (
<FileText className="h-4 w-4 text-red-500" />
) : (
<FileCode className="h-4 w-4 text-blue-500" />
)}
<span className="text-xs font-medium">
{
section.filename
}
</span>
<span className="text-[10px] text-muted-foreground">
(
{
charDisplay
}{" "}
chars)
</span>
</div>
{isExpanded ? (
<ChevronUp className="h-4 w-4 text-muted-foreground" />
) : (
<ChevronDown className="h-4 w-4 text-muted-foreground" />
)}
</button>
{isExpanded && (
<div className="px-3 py-2 border-t border-border/40 max-h-48 overflow-y-auto bg-muted/30">
<pre className="text-xs whitespace-pre-wrap text-foreground/80">
{
section.content
}
</pre>
</div>
)}
</div>
)
}
// Regular text section
return (
<div
key={`${message.id}-textsection-${partIndex}-${sectionIndex}`}
className={`prose prose-sm max-w-none break-words [&>*:first-child]:mt-0 [&>*:last-child]:mb-0 ${
message.role ===
"user"
? "[&_*]:!text-primary-foreground prose-code:bg-white/20"
: "dark:prose-invert"
}`}
>
<ReactMarkdown>
{
section.content
}
</ReactMarkdown>
</div>
)
.text
}
</ReactMarkdown>
},
)}
</div>
)
}

View File

@@ -44,6 +44,13 @@ const STORAGE_TPM_MINUTE_KEY = "next-ai-draw-io-tpm-minute"
import { useDiagram } from "@/contexts/diagram-context"
import { findCachedResponse } from "@/lib/cached-responses"
import {
extractPdfText,
extractTextFileContent,
isPdfFile,
isTextFile,
MAX_EXTRACTED_CHARS,
} from "@/lib/pdf-utils"
import { formatXML, wrapWithMxFile } from "@/lib/utils"
import { ChatMessageDisplay } from "./chat-message-display"
@@ -105,6 +112,10 @@ export default function ChatPanel({
}
const [files, setFiles] = useState<File[]>([])
// Store extracted PDF text with extraction status
const [pdfData, setPdfData] = useState<
Map<File, { text: string; charCount: number; isExtracting: boolean }>
>(new Map())
const [showHistory, setShowHistory] = useState(false)
const [showSettingsDialog, setShowSettingsDialog] = useState(false)
const [, setAccessCodeRequired] = useState(false)
@@ -711,11 +722,28 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
// Add user message and fake assistant response to messages
// The chat-message-display useEffect will handle displaying the diagram
const toolCallId = `cached-${Date.now()}`
// Build user message text including any file content
let userText = input
for (const file of files) {
if (isPdfFile(file)) {
const extracted = pdfData.get(file)
if (extracted?.text) {
userText += `\n\n[PDF: ${file.name}]\n${extracted.text}`
}
} else if (isTextFile(file)) {
const extracted = pdfData.get(file)
if (extracted?.text) {
userText += `\n\n[File: ${file.name}]\n${extracted.text}`
}
}
}
setMessages([
{
id: `user-${Date.now()}`,
role: "user" as const,
parts: [{ type: "text" as const, text: input }],
parts: [{ type: "text" as const, text: userText }],
},
{
id: `assistant-${Date.now()}`,
@@ -745,25 +773,48 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
// This ensures edit_diagram has the correct XML before AI responds
chartXMLRef.current = chartXml
const parts: any[] = [{ type: "text", text: input }]
// Build user text by concatenating input with pre-extracted text
// (Backend only reads first text part, so we must combine them)
let userText = input
const parts: any[] = []
if (files.length > 0) {
for (const file of files) {
const reader = new FileReader()
const dataUrl = await new Promise<string>((resolve) => {
reader.onload = () =>
resolve(reader.result as string)
reader.readAsDataURL(file)
})
if (isPdfFile(file)) {
// Use pre-extracted PDF text from pdfData
const extracted = pdfData.get(file)
if (extracted?.text) {
userText += `\n\n[PDF: ${file.name}]\n${extracted.text}`
}
} else if (isTextFile(file)) {
// Use pre-extracted text file content from pdfData
const extracted = pdfData.get(file)
if (extracted?.text) {
userText += `\n\n[File: ${file.name}]\n${extracted.text}`
}
} else {
// Handle as image
const reader = new FileReader()
const dataUrl = await new Promise<string>(
(resolve) => {
reader.onload = () =>
resolve(reader.result as string)
reader.readAsDataURL(file)
},
)
parts.push({
type: "file",
url: dataUrl,
mediaType: file.type,
})
parts.push({
type: "file",
url: dataUrl,
mediaType: file.type,
})
}
}
}
// Add the combined text as the first part
parts.unshift({ type: "text", text: userText })
// Get previous XML from the last snapshot (before this message)
const snapshotKeys = Array.from(
xmlSnapshotsRef.current.keys(),
@@ -843,8 +894,81 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
setInput(e.target.value)
}
const handleFileChange = (newFiles: File[]) => {
const handleFileChange = async (newFiles: File[]) => {
setFiles(newFiles)
// Extract text immediately for new PDF/text files
for (const file of newFiles) {
const needsExtraction =
(isPdfFile(file) || isTextFile(file)) && !pdfData.has(file)
if (needsExtraction) {
// Mark as extracting
setPdfData((prev) => {
const next = new Map(prev)
next.set(file, {
text: "",
charCount: 0,
isExtracting: true,
})
return next
})
// Extract text asynchronously
try {
let text: string
if (isPdfFile(file)) {
text = await extractPdfText(file)
} else {
text = await extractTextFileContent(file)
}
// Check character limit
if (text.length > MAX_EXTRACTED_CHARS) {
const limitK = MAX_EXTRACTED_CHARS / 1000
toast.error(
`${file.name}: Content exceeds ${limitK}k character limit (${(text.length / 1000).toFixed(1)}k chars)`,
)
setPdfData((prev) => {
const next = new Map(prev)
next.delete(file)
return next
})
// Remove the file from the list
setFiles((prev) => prev.filter((f) => f !== file))
continue
}
setPdfData((prev) => {
const next = new Map(prev)
next.set(file, {
text,
charCount: text.length,
isExtracting: false,
})
return next
})
} catch (error) {
console.error("Failed to extract text:", error)
toast.error(`Failed to read file: ${file.name}`)
setPdfData((prev) => {
const next = new Map(prev)
next.delete(file)
return next
})
}
}
}
// Clean up pdfData for removed files
setPdfData((prev) => {
const next = new Map(prev)
for (const key of prev.keys()) {
if (!newFiles.includes(key)) {
next.delete(key)
}
}
return next
})
}
const handleRegenerate = async (messageIndex: number) => {
@@ -1228,6 +1352,7 @@ Please retry with an adjusted search pattern or use display_diagram if retries a
}}
files={files}
onFileChange={handleFileChange}
pdfData={pdfData}
showHistory={showHistory}
onToggleHistory={setShowHistory}
sessionId={sessionId}

View File

@@ -1,15 +1,31 @@
"use client"
import { X } from "lucide-react"
import { FileCode, FileText, Loader2, X } from "lucide-react"
import Image from "next/image"
import { useEffect, useRef, useState } from "react"
import { isPdfFile, isTextFile } from "@/lib/pdf-utils"
function formatCharCount(count: number): string {
if (count >= 1000) {
return `${(count / 1000).toFixed(1)}k`
}
return String(count)
}
interface FilePreviewListProps {
files: File[]
onRemoveFile: (fileToRemove: File) => void
pdfData?: Map<
File,
{ text: string; charCount: number; isExtracting: boolean }
>
}
export function FilePreviewList({ files, onRemoveFile }: FilePreviewListProps) {
export function FilePreviewList({
files,
onRemoveFile,
pdfData = new Map(),
}: FilePreviewListProps) {
const [selectedImage, setSelectedImage] = useState<string | null>(null)
const [imageUrls, setImageUrls] = useState<Map<File, string>>(new Map())
const imageUrlsRef = useRef<Map<File, string>>(new Map())
@@ -70,12 +86,19 @@ export function FilePreviewList({ files, onRemoveFile }: FilePreviewListProps) {
<div className="flex flex-wrap gap-2 mt-2 p-2 bg-muted/50 rounded-md">
{files.map((file, index) => {
const imageUrl = imageUrls.get(file) || null
const pdfInfo = pdfData.get(file)
return (
<div key={file.name + index} className="relative group">
<div
className="w-20 h-20 border rounded-md overflow-hidden bg-muted cursor-pointer"
className={`w-20 h-20 border rounded-md overflow-hidden bg-muted ${
file.type.startsWith("image/") && imageUrl
? "cursor-pointer"
: ""
}`}
onClick={() =>
imageUrl && setSelectedImage(imageUrl)
file.type.startsWith("image/") &&
imageUrl &&
setSelectedImage(imageUrl)
}
>
{file.type.startsWith("image/") && imageUrl ? (
@@ -87,6 +110,33 @@ export function FilePreviewList({ files, onRemoveFile }: FilePreviewListProps) {
className="object-cover w-full h-full"
unoptimized
/>
) : isPdfFile(file) || isTextFile(file) ? (
<div className="flex flex-col items-center justify-center h-full p-1">
{pdfInfo?.isExtracting ? (
<Loader2 className="h-6 w-6 text-blue-500 mb-1 animate-spin" />
) : isPdfFile(file) ? (
<FileText className="h-6 w-6 text-red-500 mb-1" />
) : (
<FileCode className="h-6 w-6 text-blue-500 mb-1" />
)}
<span className="text-xs text-center truncate w-full px-1">
{file.name.length > 10
? `${file.name.slice(0, 7)}...`
: file.name}
</span>
{pdfInfo?.isExtracting ? (
<span className="text-[10px] text-muted-foreground">
Reading...
</span>
) : pdfInfo?.charCount ? (
<span className="text-[10px] text-green-600 font-medium">
{formatCharCount(
pdfInfo.charCount,
)}{" "}
chars
</span>
) : null}
</div>
) : (
<div className="flex items-center justify-center h-full text-xs text-center p-1">
{file.name}