feat: add stream smoothing feature for improved user experience

- Implement StreamSmoother class to split large content chunks into smaller pieces with delay
- Support OpenAI, Claude, and Gemini API response formats for smooth playback
- Add stream smoothing configuration to system settings (enable, chunk size, delay)
- Create streamlined API for stream smoothing with StreamSmoothingConfig dataclass
- Add admin UI controls for configuring stream smoothing parameters
- Use batch configuration loading to minimize database queries
- Enable typing effect simulation for better user experience in streaming responses
This commit is contained in:
fawney19
2025-12-19 03:15:19 +08:00
parent daf8b870f0
commit 85fafeacb8
6 changed files with 466 additions and 5 deletions

View File

@@ -32,7 +32,7 @@ from src.api.handlers.base.parsers import get_parser_for_format
from src.api.handlers.base.request_builder import PassthroughRequestBuilder
from src.api.handlers.base.response_parser import ResponseParser
from src.api.handlers.base.stream_context import StreamContext
from src.api.handlers.base.stream_processor import StreamProcessor
from src.api.handlers.base.stream_processor import StreamProcessor, StreamSmoothingConfig
from src.api.handlers.base.stream_telemetry import StreamTelemetryRecorder
from src.api.handlers.base.utils import build_sse_headers
from src.config.settings import config
@@ -52,6 +52,7 @@ from src.models.database import (
User,
)
from src.services.provider.transport import build_provider_url
from src.services.system.config import SystemConfigService
@@ -297,11 +298,23 @@ class ChatHandlerBase(BaseMessageHandler, ABC):
def update_streaming_status() -> None:
self._update_usage_to_streaming_with_ctx(ctx)
# 从数据库批量读取流式平滑输出配置(单次查询)
smoothing_cfg = SystemConfigService.get_configs(
self.db,
["stream_smoothing_enabled", "stream_smoothing_chunk_size", "stream_smoothing_delay_ms"],
)
smoothing_config = StreamSmoothingConfig(
enabled=bool(smoothing_cfg.get("stream_smoothing_enabled", False)),
chunk_size=int(smoothing_cfg.get("stream_smoothing_chunk_size") or 5),
delay_ms=int(smoothing_cfg.get("stream_smoothing_delay_ms") or 15),
)
# 创建流处理器
stream_processor = StreamProcessor(
request_id=self.request_id,
default_parser=self.parser,
on_streaming_start=update_streaming_status,
smoothing_config=smoothing_config,
)
# 定义请求函数
@@ -379,8 +392,11 @@ class ChatHandlerBase(BaseMessageHandler, ABC):
http_request.is_disconnected,
)
# 创建平滑输出流(如果启用)
smoothed_stream = stream_processor.create_smoothed_stream(monitored_stream)
return StreamingResponse(
monitored_stream,
smoothed_stream,
media_type="text/event-stream",
headers=build_sse_headers(),
background=background_tasks,