mirror of
https://github.com/fawney19/Aether.git
synced 2026-01-03 08:12:26 +08:00
refactor: make stream smoothing parameters configurable and add models cache invalidation
- Move stream smoothing parameters (chunk_size, delay_ms) to database config - Remove hardcoded stream smoothing constants from StreamProcessor - Simplify dynamic delay calculation by using config values directly - Add invalidate_models_list_cache() function to clear /v1/models endpoint cache - Call cache invalidation on model create, update, delete, and bulk operations - Update admin UI to allow runtime configuration of smoothing parameters - Improve model listing freshness when models are modified
This commit is contained in:
@@ -32,7 +32,7 @@ from src.api.handlers.base.parsers import get_parser_for_format
|
||||
from src.api.handlers.base.request_builder import PassthroughRequestBuilder
|
||||
from src.api.handlers.base.response_parser import ResponseParser
|
||||
from src.api.handlers.base.stream_context import StreamContext
|
||||
from src.api.handlers.base.stream_processor import StreamProcessor, StreamSmoothingConfig
|
||||
from src.api.handlers.base.stream_processor import StreamProcessor
|
||||
from src.api.handlers.base.stream_telemetry import StreamTelemetryRecorder
|
||||
from src.api.handlers.base.utils import build_sse_headers
|
||||
from src.config.settings import config
|
||||
@@ -52,7 +52,6 @@ from src.models.database import (
|
||||
User,
|
||||
)
|
||||
from src.services.provider.transport import build_provider_url
|
||||
from src.services.system.config import SystemConfigService
|
||||
|
||||
|
||||
|
||||
@@ -298,18 +297,11 @@ class ChatHandlerBase(BaseMessageHandler, ABC):
|
||||
def update_streaming_status() -> None:
|
||||
self._update_usage_to_streaming_with_ctx(ctx)
|
||||
|
||||
# 读取流式平滑输出开关
|
||||
smoothing_enabled = bool(
|
||||
SystemConfigService.get_config(self.db, "stream_smoothing_enabled", False)
|
||||
)
|
||||
smoothing_config = StreamSmoothingConfig(enabled=smoothing_enabled)
|
||||
|
||||
# 创建流处理器
|
||||
stream_processor = StreamProcessor(
|
||||
request_id=self.request_id,
|
||||
default_parser=self.parser,
|
||||
on_streaming_start=update_streaming_status,
|
||||
smoothing_config=smoothing_config,
|
||||
)
|
||||
|
||||
# 定义请求函数
|
||||
@@ -387,11 +379,8 @@ class ChatHandlerBase(BaseMessageHandler, ABC):
|
||||
http_request.is_disconnected,
|
||||
)
|
||||
|
||||
# 创建平滑输出流(如果启用)
|
||||
smoothed_stream = stream_processor.create_smoothed_stream(monitored_stream)
|
||||
|
||||
return StreamingResponse(
|
||||
smoothed_stream,
|
||||
monitored_stream,
|
||||
media_type="text/event-stream",
|
||||
headers=build_sse_headers(),
|
||||
background=background_tasks,
|
||||
|
||||
@@ -34,9 +34,7 @@ from src.api.handlers.base.base_handler import (
|
||||
from src.api.handlers.base.parsers import get_parser_for_format
|
||||
from src.api.handlers.base.request_builder import PassthroughRequestBuilder
|
||||
from src.api.handlers.base.stream_context import StreamContext
|
||||
from src.api.handlers.base.stream_processor import create_smoothed_stream
|
||||
from src.api.handlers.base.utils import build_sse_headers
|
||||
from src.services.system.config import SystemConfigService
|
||||
|
||||
# 直接从具体模块导入,避免循环依赖
|
||||
from src.api.handlers.base.response_parser import (
|
||||
@@ -354,17 +352,8 @@ class CliMessageHandlerBase(BaseMessageHandler):
|
||||
# 创建监控流
|
||||
monitored_stream = self._create_monitored_stream(ctx, stream_generator)
|
||||
|
||||
# 创建平滑输出流(如果启用)
|
||||
smoothing_enabled = bool(
|
||||
SystemConfigService.get_config(self.db, "stream_smoothing_enabled", False)
|
||||
)
|
||||
if smoothing_enabled:
|
||||
final_stream = create_smoothed_stream(monitored_stream)
|
||||
else:
|
||||
final_stream = monitored_stream
|
||||
|
||||
return StreamingResponse(
|
||||
final_stream,
|
||||
monitored_stream,
|
||||
media_type="text/event-stream",
|
||||
headers=build_sse_headers(),
|
||||
background=background_tasks,
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
import asyncio
|
||||
import codecs
|
||||
import json
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, AsyncGenerator, Callable, Optional
|
||||
|
||||
@@ -37,6 +36,8 @@ class StreamSmoothingConfig:
|
||||
"""流式平滑输出配置"""
|
||||
|
||||
enabled: bool = False
|
||||
chunk_size: int = 20
|
||||
delay_ms: int = 8
|
||||
|
||||
|
||||
class StreamProcessor:
|
||||
@@ -47,13 +48,6 @@ class StreamProcessor:
|
||||
从 ChatHandlerBase 中提取,使其职责更加单一。
|
||||
"""
|
||||
|
||||
# 平滑输出参数
|
||||
CHUNK_SIZE = 20 # 每块字符数
|
||||
MIN_DELAY_MS = 8 # 长文本延迟(毫秒)
|
||||
MAX_DELAY_MS = 15 # 短文本延迟(毫秒)
|
||||
SHORT_TEXT_THRESHOLD = 20 # 短文本阈值
|
||||
LONG_TEXT_THRESHOLD = 100 # 长文本阈值
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
request_id: str,
|
||||
@@ -548,10 +542,10 @@ class StreamProcessor:
|
||||
|
||||
# 只有内容长度大于 1 才需要平滑处理
|
||||
if content and len(content) > 1 and extractor:
|
||||
# 计算动态延迟
|
||||
delay_seconds = self._calculate_delay(len(content))
|
||||
# 获取配置的延迟
|
||||
delay_seconds = self._calculate_delay()
|
||||
|
||||
# 智能拆分
|
||||
# 拆分内容
|
||||
content_chunks = self._split_content(content)
|
||||
|
||||
for i, sub_content in enumerate(content_chunks):
|
||||
@@ -610,40 +604,24 @@ class StreamProcessor:
|
||||
|
||||
return None, None
|
||||
|
||||
def _calculate_delay(self, text_length: int) -> float:
|
||||
"""
|
||||
根据文本长度计算动态延迟(秒)
|
||||
|
||||
短文本使用较大延迟(打字感更强),长文本使用较小延迟(避免卡顿)。
|
||||
中间长度使用对数插值平滑过渡。
|
||||
"""
|
||||
if text_length <= self.SHORT_TEXT_THRESHOLD:
|
||||
return self.MAX_DELAY_MS / 1000.0
|
||||
if text_length >= self.LONG_TEXT_THRESHOLD:
|
||||
return self.MIN_DELAY_MS / 1000.0
|
||||
|
||||
# 对数插值:平滑过渡
|
||||
ratio = math.log(text_length / self.SHORT_TEXT_THRESHOLD) / math.log(
|
||||
self.LONG_TEXT_THRESHOLD / self.SHORT_TEXT_THRESHOLD
|
||||
)
|
||||
delay_ms = self.MAX_DELAY_MS - ratio * (self.MAX_DELAY_MS - self.MIN_DELAY_MS)
|
||||
return delay_ms / 1000.0
|
||||
def _calculate_delay(self) -> float:
|
||||
"""获取配置的延迟(秒)"""
|
||||
return self.smoothing_config.delay_ms / 1000.0
|
||||
|
||||
def _split_content(self, content: str) -> list[str]:
|
||||
"""
|
||||
按块拆分文本
|
||||
|
||||
统一使用 CHUNK_SIZE 拆分,通过动态延迟控制打字感。
|
||||
"""
|
||||
chunk_size = self.smoothing_config.chunk_size
|
||||
text_length = len(content)
|
||||
|
||||
if text_length <= self.CHUNK_SIZE:
|
||||
if text_length <= chunk_size:
|
||||
return [content]
|
||||
|
||||
# 统一按块拆分
|
||||
# 按块拆分
|
||||
chunks = []
|
||||
for i in range(0, text_length, self.CHUNK_SIZE):
|
||||
chunks.append(content[i : i + self.CHUNK_SIZE])
|
||||
for i in range(0, text_length, chunk_size):
|
||||
chunks.append(content[i : i + chunk_size])
|
||||
return chunks
|
||||
|
||||
async def _cleanup(
|
||||
@@ -664,6 +642,8 @@ class StreamProcessor:
|
||||
|
||||
async def create_smoothed_stream(
|
||||
stream_generator: AsyncGenerator[bytes, None],
|
||||
chunk_size: int = 20,
|
||||
delay_ms: int = 8,
|
||||
) -> AsyncGenerator[bytes, None]:
|
||||
"""
|
||||
独立的平滑流生成函数
|
||||
@@ -672,11 +652,13 @@ async def create_smoothed_stream(
|
||||
|
||||
Args:
|
||||
stream_generator: 原始流生成器
|
||||
chunk_size: 每块字符数
|
||||
delay_ms: 每块之间的延迟毫秒数
|
||||
|
||||
Yields:
|
||||
平滑处理后的响应数据块
|
||||
"""
|
||||
processor = _LightweightSmoother()
|
||||
processor = _LightweightSmoother(chunk_size=chunk_size, delay_ms=delay_ms)
|
||||
async for chunk in processor.smooth(stream_generator):
|
||||
yield chunk
|
||||
|
||||
@@ -688,13 +670,9 @@ class _LightweightSmoother:
|
||||
只包含平滑输出所需的最小逻辑,不依赖 StreamProcessor 的其他功能。
|
||||
"""
|
||||
|
||||
CHUNK_SIZE = 20
|
||||
MIN_DELAY_MS = 8
|
||||
MAX_DELAY_MS = 15
|
||||
SHORT_TEXT_THRESHOLD = 20
|
||||
LONG_TEXT_THRESHOLD = 100
|
||||
|
||||
def __init__(self) -> None:
|
||||
def __init__(self, chunk_size: int = 20, delay_ms: int = 8) -> None:
|
||||
self.chunk_size = chunk_size
|
||||
self.delay_ms = delay_ms
|
||||
self._extractors: dict[str, ContentExtractor] = {}
|
||||
|
||||
def _get_extractor(self, format_name: str) -> Optional[ContentExtractor]:
|
||||
@@ -715,21 +693,14 @@ class _LightweightSmoother:
|
||||
return content, extractor
|
||||
return None, None
|
||||
|
||||
def _calculate_delay(self, text_length: int) -> float:
|
||||
if text_length <= self.SHORT_TEXT_THRESHOLD:
|
||||
return self.MAX_DELAY_MS / 1000.0
|
||||
if text_length >= self.LONG_TEXT_THRESHOLD:
|
||||
return self.MIN_DELAY_MS / 1000.0
|
||||
ratio = math.log(text_length / self.SHORT_TEXT_THRESHOLD) / math.log(
|
||||
self.LONG_TEXT_THRESHOLD / self.SHORT_TEXT_THRESHOLD
|
||||
)
|
||||
return (self.MAX_DELAY_MS - ratio * (self.MAX_DELAY_MS - self.MIN_DELAY_MS)) / 1000.0
|
||||
def _calculate_delay(self) -> float:
|
||||
return self.delay_ms / 1000.0
|
||||
|
||||
def _split_content(self, content: str) -> list[str]:
|
||||
text_length = len(content)
|
||||
if text_length <= self.CHUNK_SIZE:
|
||||
if text_length <= self.chunk_size:
|
||||
return [content]
|
||||
return [content[i : i + self.CHUNK_SIZE] for i in range(0, text_length, self.CHUNK_SIZE)]
|
||||
return [content[i : i + self.chunk_size] for i in range(0, text_length, self.chunk_size)]
|
||||
|
||||
async def smooth(
|
||||
self, stream_generator: AsyncGenerator[bytes, None]
|
||||
@@ -772,7 +743,7 @@ class _LightweightSmoother:
|
||||
content, extractor = self._detect_format_and_extract(data)
|
||||
|
||||
if content and len(content) > 1 and extractor:
|
||||
delay_seconds = self._calculate_delay(len(content))
|
||||
delay_seconds = self._calculate_delay()
|
||||
content_chunks = self._split_content(content)
|
||||
|
||||
for i, sub_content in enumerate(content_chunks):
|
||||
|
||||
Reference in New Issue
Block a user