feat: make PDF/text extraction char limit configurable via env (#214)

Add NEXT_PUBLIC_MAX_EXTRACTED_CHARS environment variable to allow
configuring the maximum characters extracted from PDF and text files.
Defaults to 150000 (150k chars) if not set.
This commit is contained in:
Dayuan Jiang
2025-12-11 14:14:31 +09:00
committed by GitHub
parent ee514efa9e
commit 8b9336466f
2 changed files with 6 additions and 2 deletions

View File

@@ -1,7 +1,10 @@
import { extractText, getDocumentProxy } from "unpdf"
// Maximum characters allowed for extracted text
export const MAX_EXTRACTED_CHARS = 150000 // 150k chars
// Maximum characters allowed for extracted text (configurable via env)
const DEFAULT_MAX_EXTRACTED_CHARS = 150000 // 150k chars
export const MAX_EXTRACTED_CHARS =
Number(process.env.NEXT_PUBLIC_MAX_EXTRACTED_CHARS) ||
DEFAULT_MAX_EXTRACTED_CHARS
// Text file extensions we support
const TEXT_EXTENSIONS = [