mirror of
https://github.com/DayuanJiang/next-ai-draw-io.git
synced 2026-01-04 07:12:28 +08:00
feat: add PDF and text file upload support (#205)
- Add client-side PDF text extraction using unpdf library - Support text files (.txt, .md, .json, .csv, .py, .js, .ts, etc.) - Add file preview with character count for PDF/text files - Add 150k character limit for extracted content - Highlight Paper to Diagram example with NEW badge - Fix React hydration error by adding explicit IDs to ResizablePanelGroup - Remove code duplication by centralizing file utilities in pdf-utils.ts
This commit is contained in:
72
lib/pdf-utils.ts
Normal file
72
lib/pdf-utils.ts
Normal file
@@ -0,0 +1,72 @@
|
||||
import { extractText, getDocumentProxy } from "unpdf"
|
||||
|
||||
// Maximum characters allowed for extracted text
|
||||
export const MAX_EXTRACTED_CHARS = 150000 // 150k chars
|
||||
|
||||
// Text file extensions we support
|
||||
const TEXT_EXTENSIONS = [
|
||||
".txt",
|
||||
".md",
|
||||
".markdown",
|
||||
".json",
|
||||
".csv",
|
||||
".xml",
|
||||
".html",
|
||||
".css",
|
||||
".js",
|
||||
".ts",
|
||||
".jsx",
|
||||
".tsx",
|
||||
".py",
|
||||
".java",
|
||||
".c",
|
||||
".cpp",
|
||||
".h",
|
||||
".go",
|
||||
".rs",
|
||||
".yaml",
|
||||
".yml",
|
||||
".toml",
|
||||
".ini",
|
||||
".log",
|
||||
".sh",
|
||||
".bash",
|
||||
".zsh",
|
||||
]
|
||||
|
||||
/**
|
||||
* Extract text content from a PDF file
|
||||
* Uses unpdf library for client-side extraction
|
||||
*/
|
||||
export async function extractPdfText(file: File): Promise<string> {
|
||||
const buffer = await file.arrayBuffer()
|
||||
const pdf = await getDocumentProxy(new Uint8Array(buffer))
|
||||
const { text } = await extractText(pdf, { mergePages: true })
|
||||
return text as string
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a file is a PDF
|
||||
*/
|
||||
export function isPdfFile(file: File): boolean {
|
||||
return file.type === "application/pdf" || file.name.endsWith(".pdf")
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a file is a text file
|
||||
*/
|
||||
export function isTextFile(file: File): boolean {
|
||||
const name = file.name.toLowerCase()
|
||||
return (
|
||||
file.type.startsWith("text/") ||
|
||||
file.type === "application/json" ||
|
||||
TEXT_EXTENSIONS.some((ext) => name.endsWith(ext))
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract text content from a text file
|
||||
*/
|
||||
export async function extractTextFileContent(file: File): Promise<string> {
|
||||
return await file.text()
|
||||
}
|
||||
Reference in New Issue
Block a user