mirror of
https://github.com/DayuanJiang/next-ai-draw-io.git
synced 2026-01-03 06:42:27 +08:00
73 lines
1.5 KiB
TypeScript
73 lines
1.5 KiB
TypeScript
|
|
import { extractText, getDocumentProxy } from "unpdf"
|
||
|
|
|
||
|
|
// Maximum characters allowed for extracted text
|
||
|
|
export const MAX_EXTRACTED_CHARS = 150000 // 150k chars
|
||
|
|
|
||
|
|
// Text file extensions we support
|
||
|
|
const TEXT_EXTENSIONS = [
|
||
|
|
".txt",
|
||
|
|
".md",
|
||
|
|
".markdown",
|
||
|
|
".json",
|
||
|
|
".csv",
|
||
|
|
".xml",
|
||
|
|
".html",
|
||
|
|
".css",
|
||
|
|
".js",
|
||
|
|
".ts",
|
||
|
|
".jsx",
|
||
|
|
".tsx",
|
||
|
|
".py",
|
||
|
|
".java",
|
||
|
|
".c",
|
||
|
|
".cpp",
|
||
|
|
".h",
|
||
|
|
".go",
|
||
|
|
".rs",
|
||
|
|
".yaml",
|
||
|
|
".yml",
|
||
|
|
".toml",
|
||
|
|
".ini",
|
||
|
|
".log",
|
||
|
|
".sh",
|
||
|
|
".bash",
|
||
|
|
".zsh",
|
||
|
|
]
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Extract text content from a PDF file
|
||
|
|
* Uses unpdf library for client-side extraction
|
||
|
|
*/
|
||
|
|
export async function extractPdfText(file: File): Promise<string> {
|
||
|
|
const buffer = await file.arrayBuffer()
|
||
|
|
const pdf = await getDocumentProxy(new Uint8Array(buffer))
|
||
|
|
const { text } = await extractText(pdf, { mergePages: true })
|
||
|
|
return text as string
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Check if a file is a PDF
|
||
|
|
*/
|
||
|
|
export function isPdfFile(file: File): boolean {
|
||
|
|
return file.type === "application/pdf" || file.name.endsWith(".pdf")
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Check if a file is a text file
|
||
|
|
*/
|
||
|
|
export function isTextFile(file: File): boolean {
|
||
|
|
const name = file.name.toLowerCase()
|
||
|
|
return (
|
||
|
|
file.type.startsWith("text/") ||
|
||
|
|
file.type === "application/json" ||
|
||
|
|
TEXT_EXTENSIONS.some((ext) => name.endsWith(ext))
|
||
|
|
)
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Extract text content from a text file
|
||
|
|
*/
|
||
|
|
export async function extractTextFileContent(file: File): Promise<string> {
|
||
|
|
return await file.text()
|
||
|
|
}
|