feat: make PDF/text extraction char limit configurable via env (#214)

Add NEXT_PUBLIC_MAX_EXTRACTED_CHARS environment variable to allow
configuring the maximum characters extracted from PDF and text files.
Defaults to 150000 (150k chars) if not set.
This commit is contained in:
Dayuan Jiang
2025-12-11 14:14:31 +09:00
committed by GitHub
parent ee514efa9e
commit 8b9336466f
2 changed files with 6 additions and 2 deletions

View File

@@ -90,3 +90,4 @@ AI_MODEL=global.anthropic.claude-sonnet-4-5-20250929-v1:0
# Enable PDF file upload to extract text and generate diagrams
# Enabled by default. Set to "false" to disable.
# ENABLE_PDF_INPUT=true
# NEXT_PUBLIC_MAX_EXTRACTED_CHARS=150000 # Max characters for PDF/text extraction (default: 150000)

View File

@@ -1,7 +1,10 @@
import { extractText, getDocumentProxy } from "unpdf"
// Maximum characters allowed for extracted text
export const MAX_EXTRACTED_CHARS = 150000 // 150k chars
// Maximum characters allowed for extracted text (configurable via env)
const DEFAULT_MAX_EXTRACTED_CHARS = 150000 // 150k chars
export const MAX_EXTRACTED_CHARS =
Number(process.env.NEXT_PUBLIC_MAX_EXTRACTED_CHARS) ||
DEFAULT_MAX_EXTRACTED_CHARS
// Text file extensions we support
const TEXT_EXTENSIONS = [