refactor: 重构流式处理模块,提取 StreamContext/Processor/Telemetry

- 将 chat_handler_base.py 中的流式处理逻辑拆分为三个独立模块:
  - StreamContext: 类型安全的流式上下文数据类,替代原有的 ctx dict
  - StreamProcessor: SSE 解析、预读、嵌套错误检测
  - StreamTelemetryRecorder: 统计记录(Usage/Audit/Candidate)
- 将硬编码配置外置到 settings.py,支持环境变量覆盖:
  - HTTP 超时配置(connect/write/pool)
  - 流式处理配置(预读行数、统计延迟)
  - 并发控制配置(槽位 TTL、缓存预留比例)
This commit is contained in:
fawney19
2025-12-12 15:42:45 +08:00
parent 39defce71c
commit 53bf74429e
6 changed files with 922 additions and 600 deletions

View File

@@ -13,7 +13,7 @@ import asyncio
import math
import os
from contextlib import asynccontextmanager
from datetime import timedelta
from datetime import timedelta # noqa: F401 - kept for potential future use
from typing import Optional, Tuple
import redis.asyncio as aioredis
@@ -185,8 +185,8 @@ class ConcurrencyManager:
key_id: str,
key_max_concurrent: Optional[int],
is_cached_user: bool = False, # 新增:是否是缓存用户
cache_reservation_ratio: float = 0.3, # 新增:缓存预留比例
ttl_seconds: int = 600, # 10分钟 TTL防止死锁
cache_reservation_ratio: Optional[float] = None, # 缓存预留比例None 时从配置读取
ttl_seconds: Optional[int] = None, # TTL 秒数None 时从配置读取
) -> bool:
"""
尝试获取并发槽位(支持缓存用户优先级)
@@ -197,8 +197,8 @@ class ConcurrencyManager:
key_id: ProviderAPIKey ID
key_max_concurrent: Key 最大并发数None 表示不限制)
is_cached_user: 是否是缓存用户(缓存用户可使用全部槽位)
cache_reservation_ratio: 缓存预留比例默认30%,只对新用户生效)
ttl_seconds: TTL 秒数,防止异常情况下的死锁
cache_reservation_ratio: 缓存预留比例None 时从配置读取
ttl_seconds: TTL 秒数,None 时从配置读取
Returns:
是否成功获取True/False
@@ -209,6 +209,14 @@ class ConcurrencyManager:
- 缓存用户最多使用: 10个槽位全部
- 预留的3个槽位专门给缓存用户保证他们的请求优先
"""
# 从配置读取默认值
from src.config.settings import config
if cache_reservation_ratio is None:
cache_reservation_ratio = config.cache_reservation_ratio
if ttl_seconds is None:
ttl_seconds = config.concurrency_slot_ttl
if self._redis is None:
async with self._memory_lock:
endpoint_count = self._memory_endpoint_counts.get(endpoint_id, 0)
@@ -426,7 +434,7 @@ class ConcurrencyManager:
key_id: str,
key_max_concurrent: Optional[int],
is_cached_user: bool = False, # 新增:是否是缓存用户
cache_reservation_ratio: float = 0.3, # 新增:缓存预留比例
cache_reservation_ratio: Optional[float] = None, # 缓存预留比例None 时从配置读取
):
"""
并发控制上下文管理器(支持缓存用户优先级)
@@ -441,6 +449,12 @@ class ConcurrencyManager:
如果获取失败,会抛出 ConcurrencyLimitError 异常
"""
# 从配置读取默认值
from src.config.settings import config
if cache_reservation_ratio is None:
cache_reservation_ratio = config.cache_reservation_ratio
# 尝试获取槽位(传递缓存用户参数)
acquired = await self.acquire_slot(
endpoint_id,