feat: add URL content extraction for AI diagram generation

This commit is contained in:
Biki Kalita
2026-01-04 22:36:46 +05:30
parent 3ce047f794
commit 64268b0fac
11 changed files with 3301 additions and 2037 deletions

134
app/api/parse-url/route.ts Normal file
View File

@@ -0,0 +1,134 @@
import { extract } from "@extractus/article-extractor"
import { NextResponse } from "next/server"
import TurndownService from "turndown"
const MAX_CONTENT_LENGTH = 150000 // Match PDF limit
// SSRF protection - block private/internal addresses
function isPrivateUrl(urlString: string): boolean {
try {
const url = new URL(urlString)
const hostname = url.hostname.toLowerCase()
// Block localhost
if (
hostname === "localhost" ||
hostname === "127.0.0.1" ||
hostname === "::1"
) {
return true
}
// Block AWS/cloud metadata endpoints
if (
hostname === "169.254.169.254" ||
hostname === "metadata.google.internal"
) {
return true
}
// Check for private IPv4 ranges
const ipv4Match = hostname.match(
/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/,
)
if (ipv4Match) {
const [, a, b] = ipv4Match.map(Number)
if (a === 10) return true // 10.0.0.0/8
if (a === 172 && b >= 16 && b <= 31) return true // 172.16.0.0/12
if (a === 192 && b === 168) return true // 192.168.0.0/16
if (a === 169 && b === 254) return true // 169.254.0.0/16 (link-local)
if (a === 127) return true // 127.0.0.0/8 (loopback)
}
// Block common internal hostnames
if (
hostname.endsWith(".local") ||
hostname.endsWith(".internal") ||
hostname.endsWith(".localhost")
) {
return true
}
return false
} catch {
return true // Invalid URL - block it
}
}
export async function POST(req: Request) {
try {
const { url } = await req.json()
if (!url || typeof url !== "string") {
return NextResponse.json(
{ error: "URL is required" },
{ status: 400 },
)
}
// Validate URL format
try {
new URL(url)
} catch {
return NextResponse.json(
{ error: "Invalid URL format" },
{ status: 400 },
)
}
// SSRF protection
if (isPrivateUrl(url)) {
return NextResponse.json(
{ error: "Cannot access private/internal URLs" },
{ status: 400 },
)
}
// Extract article content
const article = await extract(url, {
headers: {
"User-Agent": "Mozilla/5.0 (compatible; NextAIDrawio/1.0)",
},
})
if (!article || !article.content) {
return NextResponse.json(
{ error: "Could not extract content from URL" },
{ status: 400 },
)
}
// Convert HTML to Markdown
const turndownService = new TurndownService({
headingStyle: "atx",
codeBlockStyle: "fenced",
})
// Remove unwanted elements before conversion
turndownService.remove(["script", "style", "iframe", "noscript"])
const markdown = turndownService.turndown(article.content)
// Check content length
if (markdown.length > MAX_CONTENT_LENGTH) {
return NextResponse.json(
{
error: `Content exceeds ${MAX_CONTENT_LENGTH / 1000}k character limit (${(markdown.length / 1000).toFixed(1)}k chars)`,
},
{ status: 400 },
)
}
return NextResponse.json({
title: article.title || "Untitled",
content: markdown,
charCount: markdown.length,
})
} catch (error) {
console.error("URL extraction error:", error)
return NextResponse.json(
{ error: "Failed to fetch or parse URL content" },
{ status: 500 },
)
}
}

View File

@@ -4,6 +4,7 @@ import {
Download,
History,
Image as ImageIcon,
Link,
Loader2,
Send,
} from "lucide-react"
@@ -18,11 +19,13 @@ import { SaveDialog } from "@/components/save-dialog"
import { Button } from "@/components/ui/button"
import { Textarea } from "@/components/ui/textarea"
import { UrlInputDialog } from "@/components/url-input-dialog"
import { useDiagram } from "@/contexts/diagram-context"
import { useDictionary } from "@/hooks/use-dictionary"
import { formatMessage } from "@/lib/i18n/utils"
import { isPdfFile, isTextFile } from "@/lib/pdf-utils"
import type { FlattenedModel } from "@/lib/types/model-config"
import { extractUrlContent, type UrlData } from "@/lib/url-utils"
import { FilePreviewList } from "./file-preview-list"
const MAX_IMAGE_SIZE = 2 * 1024 * 1024 // 2MB
@@ -144,6 +147,8 @@ interface ChatInputProps {
File,
{ text: string; charCount: number; isExtracting: boolean }
>
urlData?: Map<string, UrlData>
onUrlChange?: (data: Map<string, UrlData>) => void
sessionId?: string
error?: Error | null
@@ -163,6 +168,8 @@ export function ChatInput({
files = [],
onFileChange = () => {},
pdfData = new Map(),
urlData,
onUrlChange,
sessionId,
error = null,
models = [],
@@ -183,6 +190,8 @@ export function ChatInput({
const fileInputRef = useRef<HTMLInputElement>(null)
const [isDragging, setIsDragging] = useState(false)
const [showHistory, setShowHistory] = useState(false)
const [showUrlDialog, setShowUrlDialog] = useState(false)
const [isExtractingUrl, setIsExtractingUrl] = useState(false)
// Allow retry when there's an error (even if status is still "streaming" or "submitted")
const isDisabled =
(status === "streaming" || status === "submitted") && !error
@@ -312,6 +321,44 @@ export function ChatInput({
}
}
const handleUrlExtract = async (url: string) => {
if (!onUrlChange) return
setIsExtractingUrl(true)
try {
const existing = urlData
? new Map(urlData)
: new Map<string, UrlData>()
existing.set(url, {
url,
title: url,
content: "",
charCount: 0,
isExtracting: true,
})
onUrlChange(existing)
const data = await extractUrlContent(url)
const newUrlData = new Map(existing)
newUrlData.set(url, data)
onUrlChange(newUrlData)
setShowUrlDialog(false)
} catch (error) {
showErrorToast(
<span className="text-muted-foreground">
{error instanceof Error
? error.message
: "Failed to extract URL content"}
</span>,
)
} finally {
setIsExtractingUrl(false)
}
}
return (
<form
onSubmit={onSubmit}
@@ -324,13 +371,23 @@ export function ChatInput({
onDragLeave={handleDragLeave}
onDrop={handleDrop}
>
{/* File previews */}
{files.length > 0 && (
{/* File & URL previews */}
{(files.length > 0 || (urlData && urlData.size > 0)) && (
<div className="mb-3">
<FilePreviewList
files={files}
onRemoveFile={handleRemoveFile}
pdfData={pdfData}
urlData={urlData}
onRemoveUrl={
onUrlChange
? (url) => {
const next = new Map(urlData)
next.delete(url)
onUrlChange(next)
}
: undefined
}
/>
</div>
)}
@@ -385,6 +442,20 @@ export function ChatInput({
<ImageIcon className="h-4 w-4" />
</ButtonWithTooltip>
{onUrlChange && (
<ButtonWithTooltip
type="button"
variant="ghost"
size="sm"
onClick={() => setShowUrlDialog(true)}
disabled={isDisabled}
tooltipContent={dict.chat.ExtractURL}
className="h-8 w-8 p-0 text-muted-foreground hover:text-foreground"
>
<Link className="h-4 w-4" />
</ButtonWithTooltip>
)}
<input
type="file"
ref={fileInputRef}
@@ -443,6 +514,14 @@ export function ChatInput({
.toISOString()
.slice(0, 10)}`}
/>
{onUrlChange && (
<UrlInputDialog
open={showUrlDialog}
onOpenChange={setShowUrlDialog}
onSubmit={handleUrlExtract}
isExtracting={isExtractingUrl}
/>
)}
</form>
)
}

View File

@@ -34,6 +34,7 @@ import { findCachedResponse } from "@/lib/cached-responses"
import { formatMessage } from "@/lib/i18n/utils"
import { isPdfFile, isTextFile } from "@/lib/pdf-utils"
import { sanitizeMessages } from "@/lib/session-storage"
import type { UrlData } from "@/lib/url-utils"
import { type FileData, useFileProcessor } from "@/lib/use-file-processor"
import { useQuotaManager } from "@/lib/use-quota-manager"
import { cn, formatXML, isRealDiagram } from "@/lib/utils"
@@ -158,6 +159,7 @@ export default function ChatPanel({
// File processing using extracted hook
const { files, pdfData, handleFileChange, setFiles } = useFileProcessor()
const [urlData, setUrlData] = useState<Map<string, UrlData>>(new Map())
const [showSettingsDialog, setShowSettingsDialog] = useState(false)
const [showModelConfigDialog, setShowModelConfigDialog] = useState(false)
@@ -710,6 +712,8 @@ export default function ChatPanel({
input,
files,
pdfData,
undefined,
urlData,
)
setMessages([
@@ -735,6 +739,7 @@ export default function ChatPanel({
setInput("")
sessionStorage.removeItem(SESSION_STORAGE_INPUT_KEY)
setFiles([])
setUrlData(new Map())
return
}
}
@@ -755,6 +760,7 @@ export default function ChatPanel({
files,
pdfData,
parts,
urlData,
)
// Add the combined text as the first part
@@ -779,6 +785,7 @@ export default function ChatPanel({
setInput("")
sessionStorage.removeItem(SESSION_STORAGE_INPUT_KEY)
setFiles([])
setUrlData(new Map())
} catch (error) {
console.error("Error fetching chart data:", error)
}
@@ -854,6 +861,7 @@ export default function ChatPanel({
clearDiagram()
setDiagramHistory([])
handleFileChange([]) // Use handleFileChange to also clear pdfData
setUrlData(new Map())
const newSessionId = `session-${Date.now()}-${Math.random()
.toString(36)
.slice(2, 9)}`
@@ -972,6 +980,7 @@ export default function ChatPanel({
files: File[],
pdfData: Map<File, FileData>,
imageParts?: any[],
urlDataParam?: Map<string, UrlData>,
): Promise<string> => {
let userText = baseText
@@ -1002,6 +1011,14 @@ export default function ChatPanel({
}
}
if (urlDataParam) {
for (const [url, data] of urlDataParam) {
if (data.content) {
userText += `\n\n[URL: ${url}]\nTitle: ${data.title}\n\n${data.content}`
}
}
}
return userText
}
@@ -1262,6 +1279,8 @@ export default function ChatPanel({
files={files}
onFileChange={handleFileChange}
pdfData={pdfData}
urlData={urlData}
onUrlChange={setUrlData}
sessionId={sessionId}
error={error}
models={modelConfig.models}

View File

@@ -1,6 +1,6 @@
"use client"
import { FileCode, FileText, Loader2, X } from "lucide-react"
import { FileCode, FileText, Link, Loader2, X } from "lucide-react"
import Image from "next/image"
import { useEffect, useRef, useState } from "react"
import { useDictionary } from "@/hooks/use-dictionary"
@@ -20,12 +20,19 @@ interface FilePreviewListProps {
File,
{ text: string; charCount: number; isExtracting: boolean }
>
urlData?: Map<
string,
{ url: string; title: string; charCount: number; isExtracting: boolean }
>
onRemoveUrl?: (url: string) => void
}
export function FilePreviewList({
files,
onRemoveFile,
pdfData = new Map(),
urlData,
onRemoveUrl,
}: FilePreviewListProps) {
const dict = useDictionary()
const [selectedImage, setSelectedImage] = useState<string | null>(null)
@@ -77,7 +84,7 @@ export function FilePreviewList({
}
}, [imageUrls, selectedImage])
if (files.length === 0) return null
if (files.length === 0 && (!urlData || urlData.size === 0)) return null
return (
<>
@@ -152,6 +159,59 @@ export function FilePreviewList({
</div>
)
})}
{/* URL previews */}
{urlData && urlData.size > 0 && (
<div className="flex flex-wrap gap-2">
{Array.from(urlData.entries()).map(
([url, data], index) => (
<div
key={url + index}
className="relative group"
>
<div className="w-20 h-20 border rounded-md overflow-hidden bg-muted">
<div className="flex flex-col items-center justify-center h-full p-1">
{data.isExtracting ? (
<>
<Loader2 className="h-6 w-6 text-blue-500 mb-1 animate-spin" />
<span className="text-[10px] text-muted-foreground">
{dict.file.reading}
</span>
</>
) : (
<>
<Link className="h-6 w-6 text-blue-500 mb-1" />
<span className="text-xs text-center truncate w-full px-1">
{data.title.length > 10
? `${data.title.slice(0, 7)}...`
: data.title}
</span>
{data.charCount && (
<span className="text-[10px] text-green-600 font-medium">
{formatCharCount(
data.charCount,
)}{" "}
{dict.file.chars}
</span>
)}
</>
)}
</div>
</div>
{onRemoveUrl && (
<button
type="button"
onClick={() => onRemoveUrl(url)}
className="absolute -top-2 -right-2 bg-destructive rounded-full p-1 opacity-0 group-hover:opacity-100 transition-opacity"
aria-label={dict.file.removeFile}
>
<X className="h-3 w-3" />
</button>
)}
</div>
),
)}
</div>
)}
</div>
{/* Image Modal/Lightbox */}
{selectedImage && (

View File

@@ -0,0 +1,116 @@
"use client"
import { Link, Loader2 } from "lucide-react"
import { useState } from "react"
import { Button } from "@/components/ui/button"
import {
Dialog,
DialogContent,
DialogDescription,
DialogFooter,
DialogHeader,
DialogTitle,
} from "@/components/ui/dialog"
import { Input } from "@/components/ui/input"
import { useDictionary } from "@/hooks/use-dictionary"
interface UrlInputDialogProps {
open: boolean
onOpenChange: (open: boolean) => void
onSubmit: (url: string) => void
isExtracting: boolean
}
export function UrlInputDialog({
open,
onOpenChange,
onSubmit,
isExtracting,
}: UrlInputDialogProps) {
const dict = useDictionary()
const [url, setUrl] = useState("")
const [error, setError] = useState("")
const handleSubmit = () => {
setError("")
if (!url.trim()) {
setError("Please enter a URL")
return
}
try {
new URL(url)
} catch {
setError("Invalid URL format")
return
}
onSubmit(url.trim())
}
const handleKeyDown = (e: React.KeyboardEvent<HTMLInputElement>) => {
if (e.key === "Enter" && !isExtracting) {
e.preventDefault()
handleSubmit()
}
}
return (
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="sm:max-w-md">
<DialogHeader>
<DialogTitle>{dict.pdf.title}</DialogTitle>
<DialogDescription>
{dict.pdf.description}
</DialogDescription>
</DialogHeader>
<div className="space-y-4">
<div className="space-y-2">
<Input
value={url}
onChange={(e) => {
setUrl(e.target.value)
setError("")
}}
onKeyDown={handleKeyDown}
placeholder="https://example.com/article"
disabled={isExtracting}
autoFocus
/>
{error && (
<p className="text-sm text-destructive">{error}</p>
)}
</div>
</div>
<DialogFooter>
<Button
variant="outline"
onClick={() => onOpenChange(false)}
disabled={isExtracting}
>
{dict.pdf.Cancel}
</Button>
<Button
onClick={handleSubmit}
disabled={isExtracting || !url.trim()}
>
{isExtracting ? (
<>
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
{dict.pdf.Extracting}
</>
) : (
<>
<Link className="mr-2 h-4 w-4" />
{dict.pdf.extract}
</>
)}
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
)
}

View File

@@ -51,7 +51,8 @@
"badResponse": "Bad response",
"clickToEdit": "Click to edit",
"editMessage": "Edit message",
"saveAndSubmit": "Save & Submit"
"saveAndSubmit": "Save & Submit",
"ExtractURL": "Extract from URL"
},
"examples": {
"title": "Create diagrams with AI",
@@ -186,6 +187,13 @@
"chars": "chars",
"removeFile": "Remove file"
},
"pdf": {
"title": "Extract Content from URL",
"description": "Paste a URL to extract and analyze its content",
"Extracting": "Extracting...",
"extract": "Extract",
"Cancel": "Cancel"
},
"reasoning": {
"thinking": "Thinking...",
"thoughtFor": "Thought for {duration} seconds",

View File

@@ -51,7 +51,8 @@
"badResponse": "悪い応答",
"clickToEdit": "クリックして編集",
"editMessage": "メッセージを編集",
"saveAndSubmit": "保存して送信"
"saveAndSubmit": "保存して送信",
"ExtractURL": "URLから抽出"
},
"examples": {
"title": "AI でダイアグラムを作成",
@@ -186,6 +187,13 @@
"chars": "文字",
"removeFile": "ファイルを削除"
},
"pdf": {
"title": "URLからコンテンツを抽出",
"description": "URLを貼り付けてそのコンテンツを抽出および分析します",
"Extracting": "抽出中...",
"extract": "抽出",
"Cancel": "キャンセル"
},
"reasoning": {
"thinking": "考え中...",
"thoughtFor": "{duration} 秒考えました",

View File

@@ -51,7 +51,8 @@
"badResponse": "无帮助",
"clickToEdit": "点击编辑",
"editMessage": "编辑消息",
"saveAndSubmit": "保存并提交"
"saveAndSubmit": "保存并提交",
"ExtractURL": "从 URL 提取"
},
"examples": {
"title": "用 AI 创建图表",
@@ -186,6 +187,13 @@
"chars": "字符",
"removeFile": "移除文件"
},
"pdf": {
"title": "从 URL 提取内容",
"description": "粘贴 URL 以提取和分析其内容",
"Extracting": "提取中...",
"extract": "提取",
"Cancel": "取消"
},
"reasoning": {
"thinking": "思考中...",
"thoughtFor": "思考了 {duration} 秒",

29
lib/url-utils.ts Normal file
View File

@@ -0,0 +1,29 @@
export interface UrlData {
url: string
title: string
content: string
charCount: number
isExtracting: boolean
}
export async function extractUrlContent(url: string): Promise<UrlData> {
const response = await fetch("/api/parse-url", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ url }),
})
if (!response.ok) {
const error = await response.json().catch(() => null)
throw new Error(error?.error || "Failed to extract URL content")
}
const data = await response.json()
return {
url,
title: data.title,
content: data.content,
charCount: data.charCount,
isExtracting: false,
}
}

4860
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -38,6 +38,7 @@
"@ai-sdk/react": "^3.0.1",
"@aws-sdk/client-dynamodb": "^3.957.0",
"@aws-sdk/credential-providers": "^3.943.0",
"@extractus/article-extractor": "^8.0.18",
"@formatjs/intl-localematcher": "^0.7.2",
"@langfuse/client": "^4.4.9",
"@langfuse/otel": "^4.4.4",
@@ -86,6 +87,7 @@
"sonner": "^2.0.7",
"tailwind-merge": "^3.0.2",
"tailwindcss-animate": "^1.0.7",
"turndown": "^7.2.0",
"unpdf": "^1.4.0",
"zod": "^4.1.12"
},
@@ -110,6 +112,7 @@
"@types/pako": "^2.0.3",
"@types/react": "^19",
"@types/react-dom": "^19",
"@types/turndown": "^5.0.6",
"concurrently": "^9.2.1",
"cross-env": "^10.1.0",
"electron": "^39.2.7",