refactor: consolidate stream smoothing into StreamProcessor with intelligent timing

- Move StreamSmoother functionality directly into StreamProcessor for better integration
- Create ContentExtractor strategy pattern for format-agnostic content extraction
- Implement intelligent dynamic delay calculation based on text length
- Support three text length tiers: short (char-by-char), medium (chunked), long (chunked)
- Remove manual chunk_size and delay_ms configuration - now auto-calculated
- Simplify admin UI to single toggle switch with auto timing adjustment
- Extract format detection logic to reusable content_extractors module
- Improve code maintainability with cleaner architecture
This commit is contained in:
fawney19
2025-12-19 09:46:22 +08:00
parent 85fafeacb8
commit 070121717d
7 changed files with 600 additions and 360 deletions

View File

@@ -470,68 +470,20 @@
title="流式输出"
description="配置流式响应的输出效果"
>
<div class="grid grid-cols-1 md:grid-cols-2 gap-6">
<div class="md:col-span-2">
<div class="flex items-center space-x-2">
<Checkbox
id="stream-smoothing-enabled"
v-model:checked="systemConfig.stream_smoothing_enabled"
/>
<div>
<Label
for="stream-smoothing-enabled"
class="cursor-pointer"
>
启用平滑输出
</Label>
<p class="text-xs text-muted-foreground">
将上游返回的大块内容拆分成小块模拟打字效果
</p>
</div>
</div>
</div>
<div class="flex items-center space-x-2">
<Checkbox
id="stream-smoothing-enabled"
v-model:checked="systemConfig.stream_smoothing_enabled"
/>
<div>
<Label
for="stream-smoothing-chunk-size"
class="block text-sm font-medium"
for="stream-smoothing-enabled"
class="cursor-pointer"
>
每块字符数
启用平滑输出
</Label>
<Input
id="stream-smoothing-chunk-size"
v-model.number="systemConfig.stream_smoothing_chunk_size"
type="number"
min="1"
max="100"
placeholder="5"
class="mt-1"
:disabled="!systemConfig.stream_smoothing_enabled"
/>
<p class="mt-1 text-xs text-muted-foreground">
每次输出的字符数量1-100
</p>
</div>
<div>
<Label
for="stream-smoothing-delay-ms"
class="block text-sm font-medium"
>
输出间隔 (毫秒)
</Label>
<Input
id="stream-smoothing-delay-ms"
v-model.number="systemConfig.stream_smoothing_delay_ms"
type="number"
min="1"
max="500"
placeholder="15"
class="mt-1"
:disabled="!systemConfig.stream_smoothing_enabled"
/>
<p class="mt-1 text-xs text-muted-foreground">
每块之间的延迟毫秒数1-500
<p class="text-xs text-muted-foreground">
自动根据文本长度调整输出速度短文本逐字符输出打字感更强长文本按块输出避免卡顿
</p>
</div>
</div>
@@ -886,8 +838,6 @@ interface SystemConfig {
audit_log_retention_days: number
// 流式输出
stream_smoothing_enabled: boolean
stream_smoothing_chunk_size: number
stream_smoothing_delay_ms: number
}
const loading = ref(false)
@@ -939,8 +889,6 @@ const systemConfig = ref<SystemConfig>({
audit_log_retention_days: 30,
// 流式输出
stream_smoothing_enabled: false,
stream_smoothing_chunk_size: 5,
stream_smoothing_delay_ms: 15,
})
// 计算属性KB 和 字节 之间的转换
@@ -999,8 +947,6 @@ async function loadSystemConfig() {
'audit_log_retention_days',
// 流式输出
'stream_smoothing_enabled',
'stream_smoothing_chunk_size',
'stream_smoothing_delay_ms',
]
for (const key of configs) {
@@ -1114,16 +1060,6 @@ async function saveSystemConfig() {
value: systemConfig.value.stream_smoothing_enabled,
description: '是否启用流式平滑输出'
},
{
key: 'stream_smoothing_chunk_size',
value: systemConfig.value.stream_smoothing_chunk_size,
description: '流式平滑输出每个小块的字符数'
},
{
key: 'stream_smoothing_delay_ms',
value: systemConfig.value.stream_smoothing_delay_ms,
description: '流式平滑输出每个小块之间的延迟毫秒数'
},
]
const promises = configItems.map(item =>

View File

@@ -298,16 +298,11 @@ class ChatHandlerBase(BaseMessageHandler, ABC):
def update_streaming_status() -> None:
self._update_usage_to_streaming_with_ctx(ctx)
# 从数据库批量读取流式平滑输出配置(单次查询)
smoothing_cfg = SystemConfigService.get_configs(
self.db,
["stream_smoothing_enabled", "stream_smoothing_chunk_size", "stream_smoothing_delay_ms"],
)
smoothing_config = StreamSmoothingConfig(
enabled=bool(smoothing_cfg.get("stream_smoothing_enabled", False)),
chunk_size=int(smoothing_cfg.get("stream_smoothing_chunk_size") or 5),
delay_ms=int(smoothing_cfg.get("stream_smoothing_delay_ms") or 15),
# 读取流式平滑输出开关
smoothing_enabled = bool(
SystemConfigService.get_config(self.db, "stream_smoothing_enabled", False)
)
smoothing_config = StreamSmoothingConfig(enabled=smoothing_enabled)
# 创建流处理器
stream_processor = StreamProcessor(

View File

@@ -34,7 +34,9 @@ from src.api.handlers.base.base_handler import (
from src.api.handlers.base.parsers import get_parser_for_format
from src.api.handlers.base.request_builder import PassthroughRequestBuilder
from src.api.handlers.base.stream_context import StreamContext
from src.api.handlers.base.stream_processor import create_smoothed_stream
from src.api.handlers.base.utils import build_sse_headers
from src.services.system.config import SystemConfigService
# 直接从具体模块导入,避免循环依赖
from src.api.handlers.base.response_parser import (
@@ -352,8 +354,17 @@ class CliMessageHandlerBase(BaseMessageHandler):
# 创建监控流
monitored_stream = self._create_monitored_stream(ctx, stream_generator)
# 创建平滑输出流(如果启用)
smoothing_enabled = bool(
SystemConfigService.get_config(self.db, "stream_smoothing_enabled", False)
)
if smoothing_enabled:
final_stream = create_smoothed_stream(monitored_stream)
else:
final_stream = monitored_stream
return StreamingResponse(
monitored_stream,
final_stream,
media_type="text/event-stream",
headers=build_sse_headers(),
background=background_tasks,

View File

@@ -0,0 +1,274 @@
"""
流式内容提取器 - 策略模式实现
为不同 API 格式OpenAI、Claude、Gemini提供内容提取和 chunk 构造的抽象。
StreamSmoother 使用这些提取器来处理不同格式的 SSE 事件。
"""
import copy
import json
from abc import ABC, abstractmethod
from typing import Optional
class ContentExtractor(ABC):
"""
流式内容提取器抽象基类
定义从 SSE 事件中提取文本内容和构造新 chunk 的接口。
每种 API 格式OpenAI、Claude、Gemini需要实现自己的提取器。
"""
@abstractmethod
def extract_content(self, data: dict) -> Optional[str]:
"""
从 SSE 数据中提取可拆分的文本内容
Args:
data: 解析后的 JSON 数据
Returns:
提取的文本内容,如果无法提取则返回 None
"""
pass
@abstractmethod
def create_chunk(
self,
original_data: dict,
new_content: str,
event_type: str = "",
is_first: bool = False,
) -> bytes:
"""
使用新内容构造 SSE chunk
Args:
original_data: 原始 JSON 数据
new_content: 新的文本内容
event_type: SSE 事件类型(某些格式需要)
is_first: 是否是第一个 chunk用于保留 role 等字段)
Returns:
编码后的 SSE 字节数据
"""
pass
class OpenAIContentExtractor(ContentExtractor):
"""
OpenAI 格式内容提取器
处理 OpenAI Chat Completions API 的流式响应格式:
- 数据结构: choices[0].delta.content
- 只在 delta 仅包含 role/content 时允许拆分,避免破坏 tool_calls 等结构
"""
def extract_content(self, data: dict) -> Optional[str]:
if not isinstance(data, dict):
return None
choices = data.get("choices")
if not isinstance(choices, list) or len(choices) != 1:
return None
first_choice = choices[0]
if not isinstance(first_choice, dict):
return None
delta = first_choice.get("delta")
if not isinstance(delta, dict):
return None
content = delta.get("content")
if not isinstance(content, str):
return None
# 只有 delta 仅包含 role/content 时才允许拆分
# 避免破坏 tool_calls、function_call 等复杂结构
allowed_keys = {"role", "content"}
if not all(key in allowed_keys for key in delta.keys()):
return None
return content
def create_chunk(
self,
original_data: dict,
new_content: str,
event_type: str = "",
is_first: bool = False,
) -> bytes:
new_data = original_data.copy()
if "choices" in new_data and new_data["choices"]:
new_choices = []
for choice in new_data["choices"]:
new_choice = choice.copy()
if "delta" in new_choice:
new_delta = {}
# 只有第一个 chunk 保留 role
if is_first and "role" in new_choice["delta"]:
new_delta["role"] = new_choice["delta"]["role"]
new_delta["content"] = new_content
new_choice["delta"] = new_delta
new_choices.append(new_choice)
new_data["choices"] = new_choices
return f"data: {json.dumps(new_data, ensure_ascii=False)}\n\n".encode("utf-8")
class ClaudeContentExtractor(ContentExtractor):
"""
Claude 格式内容提取器
处理 Claude Messages API 的流式响应格式:
- 事件类型: content_block_delta
- 数据结构: delta.type=text_delta, delta.text
"""
def extract_content(self, data: dict) -> Optional[str]:
if not isinstance(data, dict):
return None
# 检查事件类型
if data.get("type") != "content_block_delta":
return None
delta = data.get("delta", {})
if not isinstance(delta, dict):
return None
# 检查 delta 类型
if delta.get("type") != "text_delta":
return None
text = delta.get("text")
if not isinstance(text, str):
return None
return text
def create_chunk(
self,
original_data: dict,
new_content: str,
event_type: str = "",
is_first: bool = False,
) -> bytes:
new_data = original_data.copy()
if "delta" in new_data:
new_delta = new_data["delta"].copy()
new_delta["text"] = new_content
new_data["delta"] = new_delta
# Claude 格式需要 event: 前缀
event_name = event_type or "content_block_delta"
return f"event: {event_name}\ndata: {json.dumps(new_data, ensure_ascii=False)}\n\n".encode(
"utf-8"
)
class GeminiContentExtractor(ContentExtractor):
"""
Gemini 格式内容提取器
处理 Gemini API 的流式响应格式:
- 数据结构: candidates[0].content.parts[0].text
- 只有纯文本块才拆分
"""
def extract_content(self, data: dict) -> Optional[str]:
if not isinstance(data, dict):
return None
candidates = data.get("candidates")
if not isinstance(candidates, list) or len(candidates) != 1:
return None
first_candidate = candidates[0]
if not isinstance(first_candidate, dict):
return None
content = first_candidate.get("content", {})
if not isinstance(content, dict):
return None
parts = content.get("parts", [])
if not isinstance(parts, list) or len(parts) != 1:
return None
first_part = parts[0]
if not isinstance(first_part, dict):
return None
text = first_part.get("text")
# 只有纯文本块(只有 text 字段)才拆分
if not isinstance(text, str) or len(first_part) != 1:
return None
return text
def create_chunk(
self,
original_data: dict,
new_content: str,
event_type: str = "",
is_first: bool = False,
) -> bytes:
new_data = copy.deepcopy(original_data)
if "candidates" in new_data and new_data["candidates"]:
first_candidate = new_data["candidates"][0]
if "content" in first_candidate:
content = first_candidate["content"]
if "parts" in content and content["parts"]:
content["parts"][0]["text"] = new_content
return f"data: {json.dumps(new_data, ensure_ascii=False)}\n\n".encode("utf-8")
# 提取器注册表
_EXTRACTORS: dict[str, type[ContentExtractor]] = {
"openai": OpenAIContentExtractor,
"claude": ClaudeContentExtractor,
"gemini": GeminiContentExtractor,
}
def get_extractor(format_name: str) -> Optional[ContentExtractor]:
"""
根据格式名获取对应的内容提取器实例
Args:
format_name: 格式名称openai, claude, gemini
Returns:
对应的提取器实例,如果格式不支持则返回 None
"""
extractor_class = _EXTRACTORS.get(format_name.lower())
if extractor_class:
return extractor_class()
return None
def register_extractor(format_name: str, extractor_class: type[ContentExtractor]) -> None:
"""
注册新的内容提取器
Args:
format_name: 格式名称
extractor_class: 提取器类
"""
_EXTRACTORS[format_name.lower()] = extractor_class
def get_extractor_formats() -> list[str]:
"""
获取所有已注册的格式名称列表
Returns:
格式名称列表
"""
return list(_EXTRACTORS.keys())

View File

@@ -6,20 +6,26 @@
2. 响应流生成
3. 预读和嵌套错误检测
4. 客户端断开检测
5. 流式平滑输出
"""
import asyncio
import codecs
import json
import math
from dataclasses import dataclass
from typing import Any, AsyncGenerator, Callable, Optional
import httpx
from src.api.handlers.base.content_extractors import (
ContentExtractor,
get_extractor,
get_extractor_formats,
)
from src.api.handlers.base.parsers import get_parser_for_format
from src.api.handlers.base.response_parser import ResponseParser
from src.api.handlers.base.stream_context import StreamContext
from src.api.handlers.base.stream_smoother import StreamSmoother
from src.core.exceptions import EmbeddedErrorException
from src.core.logger import logger
from src.models.database import Provider, ProviderEndpoint
@@ -31,18 +37,23 @@ class StreamSmoothingConfig:
"""流式平滑输出配置"""
enabled: bool = False
chunk_size: int = 5
delay_ms: int = 15
class StreamProcessor:
"""
流式响应处理器
负责处理 SSE 流的解析、错误检测响应生成。
负责处理 SSE 流的解析、错误检测响应生成和平滑输出
从 ChatHandlerBase 中提取,使其职责更加单一。
"""
# 平滑输出参数
CHUNK_SIZE = 5 # 长文本每块字符数
MIN_DELAY_MS = 15 # 长文本延迟(毫秒)
MAX_DELAY_MS = 24 # 短文本延迟(毫秒)
SHORT_TEXT_THRESHOLD = 10 # 短文本阈值(逐字符输出)
LONG_TEXT_THRESHOLD = 50 # 长文本阈值(按块输出)
def __init__(
self,
request_id: str,
@@ -68,6 +79,9 @@ class StreamProcessor:
self.collect_text = collect_text
self.smoothing_config = smoothing_config or StreamSmoothingConfig()
# 内容提取器缓存
self._extractors: dict[str, ContentExtractor] = {}
def get_parser_for_provider(self, ctx: StreamContext) -> ResponseParser:
"""
获取 Provider 格式的解析器
@@ -390,7 +404,7 @@ class StreamProcessor:
sse_parser: SSE 解析器
line: 原始行数据
"""
# SSEEventParser 以去掉换行符的单行文本作为输入;这里统一剔除 CR/LF
# SSEEventParser 以"去掉换行符"的单行文本作为输入;这里统一剔除 CR/LF
# 避免把空行误判成 "\n" 并导致事件边界解析错误。
normalized_line = line.rstrip("\r\n")
events = sse_parser.feed_line(normalized_line)
@@ -489,12 +503,153 @@ class StreamProcessor:
return
# 启用平滑输出
smoother = StreamSmoother(
chunk_size=self.smoothing_config.chunk_size,
delay_ms=self.smoothing_config.delay_ms,
buffer = b""
is_first_content = True
async for chunk in stream_generator:
buffer += chunk
# 按双换行分割 SSE 事件(标准 SSE 格式)
while b"\n\n" in buffer:
event_block, buffer = buffer.split(b"\n\n", 1)
event_str = event_block.decode("utf-8", errors="replace")
# 解析事件块
lines = event_str.strip().split("\n")
data_str = None
event_type = ""
for line in lines:
line = line.rstrip("\r")
if line.startswith("event: "):
event_type = line[7:].strip()
elif line.startswith("data: "):
data_str = line[6:]
# 没有 data 行,直接透传
if data_str is None:
yield event_block + b"\n\n"
continue
# [DONE] 直接透传
if data_str.strip() == "[DONE]":
yield event_block + b"\n\n"
continue
# 尝试解析 JSON
try:
data = json.loads(data_str)
except json.JSONDecodeError:
yield event_block + b"\n\n"
continue
# 检测格式并提取内容
content, extractor = self._detect_format_and_extract(data)
# 只有内容长度大于 1 才需要平滑处理
if content and len(content) > 1 and extractor:
# 计算动态延迟
delay_seconds = self._calculate_delay(len(content))
# 智能拆分
content_chunks = self._split_content(content)
for i, sub_content in enumerate(content_chunks):
is_first = is_first_content and i == 0
# 使用提取器创建新 chunk
sse_chunk = extractor.create_chunk(
data,
sub_content,
event_type=event_type,
is_first=is_first,
)
yield sse_chunk
# 除了最后一个块,其他块之间加延迟
if i < len(content_chunks) - 1:
await asyncio.sleep(delay_seconds)
is_first_content = False
else:
# 不需要拆分,直接透传
yield event_block + b"\n\n"
if content:
is_first_content = False
# 处理剩余数据
if buffer:
yield buffer
def _get_extractor(self, format_name: str) -> Optional[ContentExtractor]:
"""获取或创建格式对应的提取器(带缓存)"""
if format_name not in self._extractors:
extractor = get_extractor(format_name)
if extractor:
self._extractors[format_name] = extractor
return self._extractors.get(format_name)
def _detect_format_and_extract(
self, data: dict
) -> tuple[Optional[str], Optional[ContentExtractor]]:
"""
检测数据格式并提取内容
依次尝试各格式的提取器,返回第一个成功提取内容的结果。
Returns:
(content, extractor): 提取的内容和对应的提取器
"""
for format_name in get_extractor_formats():
extractor = self._get_extractor(format_name)
if extractor:
content = extractor.extract_content(data)
if content is not None:
return content, extractor
return None, None
def _calculate_delay(self, text_length: int) -> float:
"""
根据文本长度计算动态延迟(秒)
短文本使用较大延迟(打字感更强),长文本使用较小延迟(避免卡顿)。
中间长度使用对数插值平滑过渡。
"""
if text_length <= self.SHORT_TEXT_THRESHOLD:
return self.MAX_DELAY_MS / 1000.0
if text_length >= self.LONG_TEXT_THRESHOLD:
return self.MIN_DELAY_MS / 1000.0
# 对数插值:平滑过渡
ratio = math.log(text_length / self.SHORT_TEXT_THRESHOLD) / math.log(
self.LONG_TEXT_THRESHOLD / self.SHORT_TEXT_THRESHOLD
)
async for chunk in smoother.smooth_stream(stream_generator):
yield chunk
delay_ms = self.MAX_DELAY_MS - ratio * (self.MAX_DELAY_MS - self.MIN_DELAY_MS)
return delay_ms / 1000.0
def _split_content(self, content: str) -> list[str]:
"""
根据文本长度智能拆分
短文本:逐字符拆分(打字效果更真实)
长文本:按 CHUNK_SIZE 拆分(避免过多延迟)
"""
text_length = len(content)
if text_length <= self.CHUNK_SIZE:
return [content]
# 长文本按块拆分
if text_length >= self.LONG_TEXT_THRESHOLD:
chunks = []
for i in range(0, text_length, self.CHUNK_SIZE):
chunks.append(content[i : i + self.CHUNK_SIZE])
return chunks
# 短/中文本逐字符拆分
return list(content)
async def _cleanup(
self,
@@ -510,3 +665,137 @@ class StreamProcessor:
await http_client.aclose()
except Exception:
pass
async def create_smoothed_stream(
stream_generator: AsyncGenerator[bytes, None],
) -> AsyncGenerator[bytes, None]:
"""
独立的平滑流生成函数
供 CLI handler 等场景使用,无需创建完整的 StreamProcessor 实例。
Args:
stream_generator: 原始流生成器
Yields:
平滑处理后的响应数据块
"""
processor = _LightweightSmoother()
async for chunk in processor.smooth(stream_generator):
yield chunk
class _LightweightSmoother:
"""
轻量级平滑处理器
只包含平滑输出所需的最小逻辑,不依赖 StreamProcessor 的其他功能。
"""
CHUNK_SIZE = 5
MIN_DELAY_MS = 15
MAX_DELAY_MS = 24
SHORT_TEXT_THRESHOLD = 10
LONG_TEXT_THRESHOLD = 50
def __init__(self) -> None:
self._extractors: dict[str, ContentExtractor] = {}
def _get_extractor(self, format_name: str) -> Optional[ContentExtractor]:
if format_name not in self._extractors:
extractor = get_extractor(format_name)
if extractor:
self._extractors[format_name] = extractor
return self._extractors.get(format_name)
def _detect_format_and_extract(
self, data: dict
) -> tuple[Optional[str], Optional[ContentExtractor]]:
for format_name in get_extractor_formats():
extractor = self._get_extractor(format_name)
if extractor:
content = extractor.extract_content(data)
if content is not None:
return content, extractor
return None, None
def _calculate_delay(self, text_length: int) -> float:
if text_length <= self.SHORT_TEXT_THRESHOLD:
return self.MAX_DELAY_MS / 1000.0
if text_length >= self.LONG_TEXT_THRESHOLD:
return self.MIN_DELAY_MS / 1000.0
ratio = math.log(text_length / self.SHORT_TEXT_THRESHOLD) / math.log(
self.LONG_TEXT_THRESHOLD / self.SHORT_TEXT_THRESHOLD
)
return (self.MAX_DELAY_MS - ratio * (self.MAX_DELAY_MS - self.MIN_DELAY_MS)) / 1000.0
def _split_content(self, content: str) -> list[str]:
text_length = len(content)
if text_length <= self.CHUNK_SIZE:
return [content]
if text_length >= self.LONG_TEXT_THRESHOLD:
return [content[i : i + self.CHUNK_SIZE] for i in range(0, text_length, self.CHUNK_SIZE)]
return list(content)
async def smooth(
self, stream_generator: AsyncGenerator[bytes, None]
) -> AsyncGenerator[bytes, None]:
buffer = b""
is_first_content = True
async for chunk in stream_generator:
buffer += chunk
while b"\n\n" in buffer:
event_block, buffer = buffer.split(b"\n\n", 1)
event_str = event_block.decode("utf-8", errors="replace")
lines = event_str.strip().split("\n")
data_str = None
event_type = ""
for line in lines:
line = line.rstrip("\r")
if line.startswith("event: "):
event_type = line[7:].strip()
elif line.startswith("data: "):
data_str = line[6:]
if data_str is None:
yield event_block + b"\n\n"
continue
if data_str.strip() == "[DONE]":
yield event_block + b"\n\n"
continue
try:
data = json.loads(data_str)
except json.JSONDecodeError:
yield event_block + b"\n\n"
continue
content, extractor = self._detect_format_and_extract(data)
if content and len(content) > 1 and extractor:
delay_seconds = self._calculate_delay(len(content))
content_chunks = self._split_content(content)
for i, sub_content in enumerate(content_chunks):
is_first = is_first_content and i == 0
sse_chunk = extractor.create_chunk(
data, sub_content, event_type=event_type, is_first=is_first
)
yield sse_chunk
if i < len(content_chunks) - 1:
await asyncio.sleep(delay_seconds)
is_first_content = False
else:
yield event_block + b"\n\n"
if content:
is_first_content = False
if buffer:
yield buffer

View File

@@ -1,257 +0,0 @@
"""
流式平滑输出处理器
将上游返回的大 chunk 拆分成小块,模拟更流畅的打字效果。
支持 OpenAI、Claude、Gemini 格式的 SSE 事件。
"""
import asyncio
import copy
import json
from typing import AsyncGenerator, Optional, Tuple
class StreamSmoother:
"""
流式平滑输出处理器
将 SSE 事件中的大段 content 拆分成小块输出,
每块之间加入微小延迟,模拟打字效果。
"""
def __init__(
self,
chunk_size: int = 5,
delay_ms: int = 15,
):
"""
初始化平滑处理器
Args:
chunk_size: 每个小块的字符数
delay_ms: 每个小块之间的延迟毫秒数
"""
self.chunk_size = chunk_size
self.delay_ms = delay_ms
self.delay_seconds = self.delay_ms / 1000.0
def _split_content(self, content: str) -> list[str]:
"""
将内容按字符数拆分
对于中文等多字节字符,按字符(而非字节)拆分。
"""
if len(content) <= self.chunk_size:
return [content]
chunks = []
for i in range(0, len(content), self.chunk_size):
chunks.append(content[i : i + self.chunk_size])
return chunks
def _extract_content(self, data: dict) -> Tuple[Optional[str], str]:
"""
从 SSE 数据中提取可拆分的 content
Returns:
(content, format): content 为提取的文本format 为检测到的格式
format: "openai" | "claude" | "gemini" | "unknown"
"""
if not isinstance(data, dict):
return None, "unknown"
# OpenAI 格式: choices[0].delta.content
# 只在 delta 仅包含 role/content 时允许拆分,避免破坏 tool_calls 等结构
choices = data.get("choices")
if isinstance(choices, list) and len(choices) == 1:
first_choice = choices[0]
if isinstance(first_choice, dict):
delta = first_choice.get("delta")
if isinstance(delta, dict):
content = delta.get("content")
if isinstance(content, str):
allowed_keys = {"role", "content"}
if all(key in allowed_keys for key in delta.keys()):
return content, "openai"
# Claude 格式: type=content_block_delta, delta.type=text_delta, delta.text
if data.get("type") == "content_block_delta":
delta = data.get("delta", {})
if isinstance(delta, dict) and delta.get("type") == "text_delta":
text = delta.get("text")
if isinstance(text, str):
return text, "claude"
# Gemini 格式: candidates[0].content.parts[0].text
candidates = data.get("candidates")
if isinstance(candidates, list) and len(candidates) == 1:
first_candidate = candidates[0]
if isinstance(first_candidate, dict):
content = first_candidate.get("content", {})
if isinstance(content, dict):
parts = content.get("parts", [])
if isinstance(parts, list) and len(parts) == 1:
first_part = parts[0]
if isinstance(first_part, dict):
text = first_part.get("text")
# 只有纯文本块才拆分
if isinstance(text, str) and len(first_part) == 1:
return text, "gemini"
return None, "unknown"
def _create_openai_chunk(
self,
original_data: dict,
new_content: str,
is_first: bool = False,
) -> bytes:
"""创建 OpenAI 格式的 SSE chunk"""
new_data = original_data.copy()
if "choices" in new_data and new_data["choices"]:
new_choices = []
for choice in new_data["choices"]:
new_choice = choice.copy()
if "delta" in new_choice:
new_delta = {}
# 只有第一个 chunk 保留 role
if is_first and "role" in new_choice["delta"]:
new_delta["role"] = new_choice["delta"]["role"]
new_delta["content"] = new_content
new_choice["delta"] = new_delta
new_choices.append(new_choice)
new_data["choices"] = new_choices
return f"data: {json.dumps(new_data, ensure_ascii=False)}\n\n".encode("utf-8")
def _create_claude_chunk(
self,
original_data: dict,
new_content: str,
event_type: str,
) -> bytes:
"""创建 Claude 格式的 SSE chunk"""
new_data = original_data.copy()
if "delta" in new_data:
new_delta = new_data["delta"].copy()
new_delta["text"] = new_content
new_data["delta"] = new_delta
# Claude 格式需要 event: 前缀
return f"event: {event_type}\ndata: {json.dumps(new_data, ensure_ascii=False)}\n\n".encode(
"utf-8"
)
def _create_gemini_chunk(
self,
original_data: dict,
new_content: str,
) -> bytes:
"""创建 Gemini 格式的 SSE chunk"""
new_data = copy.deepcopy(original_data)
if "candidates" in new_data and new_data["candidates"]:
first_candidate = new_data["candidates"][0]
if "content" in first_candidate:
content = first_candidate["content"]
if "parts" in content and content["parts"]:
content["parts"][0]["text"] = new_content
return f"data: {json.dumps(new_data, ensure_ascii=False)}\n\n".encode("utf-8")
async def smooth_stream(
self,
byte_iterator: AsyncGenerator[bytes, None],
) -> AsyncGenerator[bytes, None]:
"""
对字节流进行平滑处理
解析 SSE 事件,拆分大 content添加延迟后输出。
Args:
byte_iterator: 原始字节流
Yields:
平滑处理后的字节块
"""
buffer = b""
is_first_content = True
async for chunk in byte_iterator:
buffer += chunk
# 按双换行分割 SSE 事件(标准 SSE 格式)
while b"\n\n" in buffer:
event_block, buffer = buffer.split(b"\n\n", 1)
event_str = event_block.decode("utf-8", errors="replace")
# 解析事件块
lines = event_str.strip().split("\n")
data_str = None
event_type = ""
for line in lines:
line = line.rstrip("\r")
if line.startswith("event: "):
event_type = line[7:].strip()
elif line.startswith("data: "):
data_str = line[6:]
# 没有 data 行,直接透传
if data_str is None:
yield event_block + b"\n\n"
continue
# [DONE] 直接透传
if data_str.strip() == "[DONE]":
yield event_block + b"\n\n"
continue
# 尝试解析 JSON
try:
data = json.loads(data_str)
except json.JSONDecodeError:
yield event_block + b"\n\n"
continue
# 提取 content 和格式
content, fmt = self._extract_content(data)
if content and len(content) > self.chunk_size:
# 需要拆分
content_chunks = self._split_content(content)
for i, sub_content in enumerate(content_chunks):
is_first = is_first_content and i == 0
if fmt == "openai":
sse_chunk = self._create_openai_chunk(data, sub_content, is_first)
elif fmt == "claude":
sse_chunk = self._create_claude_chunk(
data, sub_content, event_type or "content_block_delta"
)
elif fmt == "gemini":
sse_chunk = self._create_gemini_chunk(data, sub_content)
else:
# 未知格式,透传原始事件
yield event_block + b"\n\n"
break
yield sse_chunk
# 除了最后一个块,其他块之间加延迟
if i < len(content_chunks) - 1:
await asyncio.sleep(self.delay_seconds)
else:
is_first_content = False
else:
# 不需要拆分,直接透传
yield event_block + b"\n\n"
if content:
is_first_content = False
# 处理剩余数据
if buffer:
yield buffer

View File

@@ -81,15 +81,7 @@ class SystemConfigService:
# 流式平滑输出配置
"stream_smoothing_enabled": {
"value": False,
"description": "是否启用流式平滑输出,将大 chunk 拆分成小块模拟打字效果",
},
"stream_smoothing_chunk_size": {
"value": 5,
"description": "流式平滑输出每个小块的字符数",
},
"stream_smoothing_delay_ms": {
"value": 15,
"description": "流式平滑输出每个小块之间的延迟毫秒数",
"description": "是否启用流式平滑输出,自动根据文本长度调整输出速度",
},
}