diff --git a/frontend/src/views/admin/SystemSettings.vue b/frontend/src/views/admin/SystemSettings.vue index 60dfb33..a63425b 100644 --- a/frontend/src/views/admin/SystemSettings.vue +++ b/frontend/src/views/admin/SystemSettings.vue @@ -470,20 +470,68 @@ title="流式输出" description="配置流式响应的输出效果" > -
- +
+
+
+ +
+ +

+ 将上游返回的大块内容拆分成小块,模拟打字效果 +

+
+
+
+
-

- 自动根据文本长度调整输出速度:短文本逐字符输出(打字感更强),长文本按块输出(避免卡顿) + +

+ 每次输出的字符数量(1-100) +

+
+ +
+ + +

+ 每块之间的延迟毫秒数(1-100)

@@ -838,6 +886,8 @@ interface SystemConfig { audit_log_retention_days: number // 流式输出 stream_smoothing_enabled: boolean + stream_smoothing_chunk_size: number + stream_smoothing_delay_ms: number } const loading = ref(false) @@ -889,6 +939,8 @@ const systemConfig = ref({ audit_log_retention_days: 30, // 流式输出 stream_smoothing_enabled: false, + stream_smoothing_chunk_size: 20, + stream_smoothing_delay_ms: 8, }) // 计算属性:KB 和 字节 之间的转换 @@ -947,6 +999,8 @@ async function loadSystemConfig() { 'audit_log_retention_days', // 流式输出 'stream_smoothing_enabled', + 'stream_smoothing_chunk_size', + 'stream_smoothing_delay_ms', ] for (const key of configs) { @@ -1060,6 +1114,16 @@ async function saveSystemConfig() { value: systemConfig.value.stream_smoothing_enabled, description: '是否启用流式平滑输出' }, + { + key: 'stream_smoothing_chunk_size', + value: systemConfig.value.stream_smoothing_chunk_size, + description: '流式平滑输出每个小块的字符数' + }, + { + key: 'stream_smoothing_delay_ms', + value: systemConfig.value.stream_smoothing_delay_ms, + description: '流式平滑输出每个小块之间的延迟毫秒数' + }, ] const promises = configItems.map(item => diff --git a/src/api/admin/providers/models.py b/src/api/admin/providers/models.py index 8b84ca6..9b56366 100644 --- a/src/api/admin/providers/models.py +++ b/src/api/admin/providers/models.py @@ -9,6 +9,7 @@ from fastapi import APIRouter, Depends, Request from sqlalchemy.orm import Session, joinedload from src.api.base.admin_adapter import AdminApiAdapter +from src.api.base.models_service import invalidate_models_list_cache from src.api.base.pipeline import ApiRequestPipeline from src.core.exceptions import InvalidRequestException, NotFoundException from src.core.logger import logger @@ -419,4 +420,8 @@ class AdminBatchAssignModelsToProviderAdapter(AdminApiAdapter): f"Batch assigned {len(success)} GlobalModels to provider {provider.name} by {context.user.username}" ) + # 清除 /v1/models 列表缓存 + if success: + await invalidate_models_list_cache() + return BatchAssignModelsToProviderResponse(success=success, errors=errors) diff --git a/src/api/base/models_service.py b/src/api/base/models_service.py index d4aeb67..ee9cdfa 100644 --- a/src/api/base/models_service.py +++ b/src/api/base/models_service.py @@ -55,6 +55,23 @@ async def _set_cached_models(api_formats: list[str], models: list["ModelInfo"]) logger.warning(f"[ModelsService] 缓存写入失败: {e}") +async def invalidate_models_list_cache() -> None: + """ + 清除所有 /v1/models 列表缓存 + + 在模型创建、更新、删除时调用,确保模型列表实时更新 + """ + # 清除所有格式的缓存 + all_formats = ["CLAUDE", "OPENAI", "GEMINI"] + for fmt in all_formats: + cache_key = f"{_CACHE_KEY_PREFIX}:{fmt}" + try: + await CacheService.delete(cache_key) + logger.debug(f"[ModelsService] 已清除缓存: {cache_key}") + except Exception as e: + logger.warning(f"[ModelsService] 清除缓存失败 {cache_key}: {e}") + + @dataclass class ModelInfo: """统一的模型信息结构""" diff --git a/src/api/handlers/base/chat_handler_base.py b/src/api/handlers/base/chat_handler_base.py index 417aeec..ceb10d3 100644 --- a/src/api/handlers/base/chat_handler_base.py +++ b/src/api/handlers/base/chat_handler_base.py @@ -32,7 +32,7 @@ from src.api.handlers.base.parsers import get_parser_for_format from src.api.handlers.base.request_builder import PassthroughRequestBuilder from src.api.handlers.base.response_parser import ResponseParser from src.api.handlers.base.stream_context import StreamContext -from src.api.handlers.base.stream_processor import StreamProcessor, StreamSmoothingConfig +from src.api.handlers.base.stream_processor import StreamProcessor from src.api.handlers.base.stream_telemetry import StreamTelemetryRecorder from src.api.handlers.base.utils import build_sse_headers from src.config.settings import config @@ -52,7 +52,6 @@ from src.models.database import ( User, ) from src.services.provider.transport import build_provider_url -from src.services.system.config import SystemConfigService @@ -298,18 +297,11 @@ class ChatHandlerBase(BaseMessageHandler, ABC): def update_streaming_status() -> None: self._update_usage_to_streaming_with_ctx(ctx) - # 读取流式平滑输出开关 - smoothing_enabled = bool( - SystemConfigService.get_config(self.db, "stream_smoothing_enabled", False) - ) - smoothing_config = StreamSmoothingConfig(enabled=smoothing_enabled) - # 创建流处理器 stream_processor = StreamProcessor( request_id=self.request_id, default_parser=self.parser, on_streaming_start=update_streaming_status, - smoothing_config=smoothing_config, ) # 定义请求函数 @@ -387,11 +379,8 @@ class ChatHandlerBase(BaseMessageHandler, ABC): http_request.is_disconnected, ) - # 创建平滑输出流(如果启用) - smoothed_stream = stream_processor.create_smoothed_stream(monitored_stream) - return StreamingResponse( - smoothed_stream, + monitored_stream, media_type="text/event-stream", headers=build_sse_headers(), background=background_tasks, diff --git a/src/api/handlers/base/cli_handler_base.py b/src/api/handlers/base/cli_handler_base.py index e4b863e..0d8ca16 100644 --- a/src/api/handlers/base/cli_handler_base.py +++ b/src/api/handlers/base/cli_handler_base.py @@ -34,9 +34,7 @@ from src.api.handlers.base.base_handler import ( from src.api.handlers.base.parsers import get_parser_for_format from src.api.handlers.base.request_builder import PassthroughRequestBuilder from src.api.handlers.base.stream_context import StreamContext -from src.api.handlers.base.stream_processor import create_smoothed_stream from src.api.handlers.base.utils import build_sse_headers -from src.services.system.config import SystemConfigService # 直接从具体模块导入,避免循环依赖 from src.api.handlers.base.response_parser import ( @@ -354,17 +352,8 @@ class CliMessageHandlerBase(BaseMessageHandler): # 创建监控流 monitored_stream = self._create_monitored_stream(ctx, stream_generator) - # 创建平滑输出流(如果启用) - smoothing_enabled = bool( - SystemConfigService.get_config(self.db, "stream_smoothing_enabled", False) - ) - if smoothing_enabled: - final_stream = create_smoothed_stream(monitored_stream) - else: - final_stream = monitored_stream - return StreamingResponse( - final_stream, + monitored_stream, media_type="text/event-stream", headers=build_sse_headers(), background=background_tasks, diff --git a/src/api/handlers/base/stream_processor.py b/src/api/handlers/base/stream_processor.py index 07fbe42..275de5d 100644 --- a/src/api/handlers/base/stream_processor.py +++ b/src/api/handlers/base/stream_processor.py @@ -12,7 +12,6 @@ import asyncio import codecs import json -import math from dataclasses import dataclass from typing import Any, AsyncGenerator, Callable, Optional @@ -37,6 +36,8 @@ class StreamSmoothingConfig: """流式平滑输出配置""" enabled: bool = False + chunk_size: int = 20 + delay_ms: int = 8 class StreamProcessor: @@ -47,13 +48,6 @@ class StreamProcessor: 从 ChatHandlerBase 中提取,使其职责更加单一。 """ - # 平滑输出参数 - CHUNK_SIZE = 20 # 每块字符数 - MIN_DELAY_MS = 8 # 长文本延迟(毫秒) - MAX_DELAY_MS = 15 # 短文本延迟(毫秒) - SHORT_TEXT_THRESHOLD = 20 # 短文本阈值 - LONG_TEXT_THRESHOLD = 100 # 长文本阈值 - def __init__( self, request_id: str, @@ -548,10 +542,10 @@ class StreamProcessor: # 只有内容长度大于 1 才需要平滑处理 if content and len(content) > 1 and extractor: - # 计算动态延迟 - delay_seconds = self._calculate_delay(len(content)) + # 获取配置的延迟 + delay_seconds = self._calculate_delay() - # 智能拆分 + # 拆分内容 content_chunks = self._split_content(content) for i, sub_content in enumerate(content_chunks): @@ -610,40 +604,24 @@ class StreamProcessor: return None, None - def _calculate_delay(self, text_length: int) -> float: - """ - 根据文本长度计算动态延迟(秒) - - 短文本使用较大延迟(打字感更强),长文本使用较小延迟(避免卡顿)。 - 中间长度使用对数插值平滑过渡。 - """ - if text_length <= self.SHORT_TEXT_THRESHOLD: - return self.MAX_DELAY_MS / 1000.0 - if text_length >= self.LONG_TEXT_THRESHOLD: - return self.MIN_DELAY_MS / 1000.0 - - # 对数插值:平滑过渡 - ratio = math.log(text_length / self.SHORT_TEXT_THRESHOLD) / math.log( - self.LONG_TEXT_THRESHOLD / self.SHORT_TEXT_THRESHOLD - ) - delay_ms = self.MAX_DELAY_MS - ratio * (self.MAX_DELAY_MS - self.MIN_DELAY_MS) - return delay_ms / 1000.0 + def _calculate_delay(self) -> float: + """获取配置的延迟(秒)""" + return self.smoothing_config.delay_ms / 1000.0 def _split_content(self, content: str) -> list[str]: """ 按块拆分文本 - - 统一使用 CHUNK_SIZE 拆分,通过动态延迟控制打字感。 """ + chunk_size = self.smoothing_config.chunk_size text_length = len(content) - if text_length <= self.CHUNK_SIZE: + if text_length <= chunk_size: return [content] - # 统一按块拆分 + # 按块拆分 chunks = [] - for i in range(0, text_length, self.CHUNK_SIZE): - chunks.append(content[i : i + self.CHUNK_SIZE]) + for i in range(0, text_length, chunk_size): + chunks.append(content[i : i + chunk_size]) return chunks async def _cleanup( @@ -664,6 +642,8 @@ class StreamProcessor: async def create_smoothed_stream( stream_generator: AsyncGenerator[bytes, None], + chunk_size: int = 20, + delay_ms: int = 8, ) -> AsyncGenerator[bytes, None]: """ 独立的平滑流生成函数 @@ -672,11 +652,13 @@ async def create_smoothed_stream( Args: stream_generator: 原始流生成器 + chunk_size: 每块字符数 + delay_ms: 每块之间的延迟毫秒数 Yields: 平滑处理后的响应数据块 """ - processor = _LightweightSmoother() + processor = _LightweightSmoother(chunk_size=chunk_size, delay_ms=delay_ms) async for chunk in processor.smooth(stream_generator): yield chunk @@ -688,13 +670,9 @@ class _LightweightSmoother: 只包含平滑输出所需的最小逻辑,不依赖 StreamProcessor 的其他功能。 """ - CHUNK_SIZE = 20 - MIN_DELAY_MS = 8 - MAX_DELAY_MS = 15 - SHORT_TEXT_THRESHOLD = 20 - LONG_TEXT_THRESHOLD = 100 - - def __init__(self) -> None: + def __init__(self, chunk_size: int = 20, delay_ms: int = 8) -> None: + self.chunk_size = chunk_size + self.delay_ms = delay_ms self._extractors: dict[str, ContentExtractor] = {} def _get_extractor(self, format_name: str) -> Optional[ContentExtractor]: @@ -715,21 +693,14 @@ class _LightweightSmoother: return content, extractor return None, None - def _calculate_delay(self, text_length: int) -> float: - if text_length <= self.SHORT_TEXT_THRESHOLD: - return self.MAX_DELAY_MS / 1000.0 - if text_length >= self.LONG_TEXT_THRESHOLD: - return self.MIN_DELAY_MS / 1000.0 - ratio = math.log(text_length / self.SHORT_TEXT_THRESHOLD) / math.log( - self.LONG_TEXT_THRESHOLD / self.SHORT_TEXT_THRESHOLD - ) - return (self.MAX_DELAY_MS - ratio * (self.MAX_DELAY_MS - self.MIN_DELAY_MS)) / 1000.0 + def _calculate_delay(self) -> float: + return self.delay_ms / 1000.0 def _split_content(self, content: str) -> list[str]: text_length = len(content) - if text_length <= self.CHUNK_SIZE: + if text_length <= self.chunk_size: return [content] - return [content[i : i + self.CHUNK_SIZE] for i in range(0, text_length, self.CHUNK_SIZE)] + return [content[i : i + self.chunk_size] for i in range(0, text_length, self.chunk_size)] async def smooth( self, stream_generator: AsyncGenerator[bytes, None] @@ -772,7 +743,7 @@ class _LightweightSmoother: content, extractor = self._detect_format_and_extract(data) if content and len(content) > 1 and extractor: - delay_seconds = self._calculate_delay(len(content)) + delay_seconds = self._calculate_delay() content_chunks = self._split_content(content) for i, sub_content in enumerate(content_chunks): diff --git a/src/services/model/service.py b/src/services/model/service.py index 1be69d1..764701c 100644 --- a/src/services/model/service.py +++ b/src/services/model/service.py @@ -13,6 +13,7 @@ from src.core.exceptions import InvalidRequestException, NotFoundException from src.core.logger import logger from src.models.api import ModelCreate, ModelResponse, ModelUpdate from src.models.database import Model, Provider +from src.api.base.models_service import invalidate_models_list_cache from src.services.cache.invalidation import get_cache_invalidation_service from src.services.cache.model_cache import ModelCacheService @@ -75,6 +76,10 @@ class ModelService: ) logger.info(f"创建模型成功: provider={provider.name}, model={model.provider_model_name}, global_model_id={model.global_model_id}") + + # 清除 /v1/models 列表缓存 + asyncio.create_task(invalidate_models_list_cache()) + return model except IntegrityError as e: @@ -197,6 +202,9 @@ class ModelService: cache_service = get_cache_invalidation_service() cache_service.on_model_changed(model.provider_id, model.global_model_id) + # 清除 /v1/models 列表缓存 + asyncio.create_task(invalidate_models_list_cache()) + logger.info(f"更新模型成功: id={model_id}, 最终 supports_vision: {model.supports_vision}, supports_function_calling: {model.supports_function_calling}, supports_extended_thinking: {model.supports_extended_thinking}") return model except IntegrityError as e: @@ -261,6 +269,9 @@ class ModelService: cache_service = get_cache_invalidation_service() cache_service.on_model_changed(cache_info["provider_id"], cache_info["global_model_id"]) + # 清除 /v1/models 列表缓存 + asyncio.create_task(invalidate_models_list_cache()) + logger.info(f"删除模型成功: id={model_id}, provider_model_name={cache_info['provider_model_name']}, " f"global_model_id={cache_info['global_model_id'][:8] if cache_info['global_model_id'] else 'None'}...") except Exception as e: @@ -295,6 +306,9 @@ class ModelService: cache_service = get_cache_invalidation_service() cache_service.on_model_changed(model.provider_id, model.global_model_id) + # 清除 /v1/models 列表缓存 + asyncio.create_task(invalidate_models_list_cache()) + status = "可用" if is_available else "不可用" logger.info(f"更新模型可用状态: id={model_id}, status={status}") return model @@ -358,6 +372,9 @@ class ModelService: for model in created_models: db.refresh(model) logger.info(f"批量创建 {len(created_models)} 个模型成功") + + # 清除 /v1/models 列表缓存 + asyncio.create_task(invalidate_models_list_cache()) except IntegrityError as e: db.rollback() logger.error(f"批量创建模型失败: {str(e)}") diff --git a/src/services/system/config.py b/src/services/system/config.py index 3ff9e32..cfa9210 100644 --- a/src/services/system/config.py +++ b/src/services/system/config.py @@ -83,6 +83,14 @@ class SystemConfigService: "value": False, "description": "是否启用流式平滑输出,自动根据文本长度调整输出速度", }, + "stream_smoothing_chunk_size": { + "value": 20, + "description": "流式平滑输出每个小块的字符数", + }, + "stream_smoothing_delay_ms": { + "value": 8, + "description": "流式平滑输出每个小块之间的延迟毫秒数", + }, } @classmethod