mirror of
https://github.com/fawney19/Aether.git
synced 2026-01-10 03:32:26 +08:00
feat: 流式预读增强与自适应并发算法优化
流式预读增强: - 新增预读字节上限(64KB),防止无换行响应导致内存增长 - 预读结束后检测非 SSE 格式的错误响应(HTML 页面、纯 JSON 错误) - 抽取 check_html_response 和 check_prefetched_response_error 到 utils.py 自适应并发算法优化(边界记忆 + 渐进探测): - 缩容策略:从乘性减少改为边界 -1,一次 429 即可收敛到真实限制附近 - 扩容策略:普通扩容不超过已知边界,探测性扩容可谨慎突破(每次 +1) - 仅在并发限制 429 时记录边界,避免 RPM/UNKNOWN 类型覆盖
This commit is contained in:
@@ -34,7 +34,11 @@ from src.api.handlers.base.base_handler import (
|
||||
from src.api.handlers.base.parsers import get_parser_for_format
|
||||
from src.api.handlers.base.request_builder import PassthroughRequestBuilder
|
||||
from src.api.handlers.base.stream_context import StreamContext
|
||||
from src.api.handlers.base.utils import build_sse_headers
|
||||
from src.api.handlers.base.utils import (
|
||||
build_sse_headers,
|
||||
check_html_response,
|
||||
check_prefetched_response_error,
|
||||
)
|
||||
from src.core.error_utils import extract_error_message
|
||||
|
||||
# 直接从具体模块导入,避免循环依赖
|
||||
@@ -58,6 +62,7 @@ from src.models.database import (
|
||||
ProviderEndpoint,
|
||||
User,
|
||||
)
|
||||
from src.config.constants import StreamDefaults
|
||||
from src.config.settings import config
|
||||
from src.services.provider.transport import build_provider_url
|
||||
from src.utils.sse_parser import SSEEventParser
|
||||
@@ -703,7 +708,9 @@ class CliMessageHandlerBase(BaseMessageHandler):
|
||||
ProviderTimeoutException: 如果首字节超时(TTFB timeout)
|
||||
"""
|
||||
prefetched_chunks: list = []
|
||||
max_prefetch_lines = 5 # 最多预读5行来检测错误
|
||||
max_prefetch_lines = config.stream_prefetch_lines # 最多预读行数来检测错误
|
||||
max_prefetch_bytes = StreamDefaults.MAX_PREFETCH_BYTES # 避免无换行响应导致 buffer 增长
|
||||
total_prefetched_bytes = 0
|
||||
buffer = b""
|
||||
line_count = 0
|
||||
should_stop = False
|
||||
@@ -730,14 +737,16 @@ class CliMessageHandlerBase(BaseMessageHandler):
|
||||
provider_name=str(provider.name),
|
||||
)
|
||||
prefetched_chunks.append(first_chunk)
|
||||
total_prefetched_bytes += len(first_chunk)
|
||||
buffer += first_chunk
|
||||
|
||||
# 继续读取剩余的预读数据
|
||||
async for chunk in aiter:
|
||||
prefetched_chunks.append(chunk)
|
||||
total_prefetched_bytes += len(chunk)
|
||||
buffer += chunk
|
||||
|
||||
# 尝试按行解析缓冲区
|
||||
# 尝试按行解析缓冲区(SSE 格式)
|
||||
while b"\n" in buffer:
|
||||
line_bytes, buffer = buffer.split(b"\n", 1)
|
||||
try:
|
||||
@@ -754,15 +763,15 @@ class CliMessageHandlerBase(BaseMessageHandler):
|
||||
normalized_line = line.rstrip("\r")
|
||||
|
||||
# 检测 HTML 响应(base_url 配置错误的常见症状)
|
||||
lower_line = normalized_line.lower()
|
||||
if lower_line.startswith("<!doctype") or lower_line.startswith("<html"):
|
||||
if check_html_response(normalized_line):
|
||||
logger.error(
|
||||
f" [{self.request_id}] 检测到 HTML 响应,可能是 base_url 配置错误: "
|
||||
f"Provider={provider.name}, Endpoint={endpoint.id[:8]}..., "
|
||||
f"base_url={endpoint.base_url}"
|
||||
)
|
||||
raise ProviderNotAvailableException(
|
||||
f"提供商 '{provider.name}' 返回了 HTML 页面而非 API 响应,请检查 endpoint 的 base_url 配置是否正确"
|
||||
f"提供商 '{provider.name}' 返回了 HTML 页面而非 API 响应,"
|
||||
f"请检查 endpoint 的 base_url 配置是否正确"
|
||||
)
|
||||
|
||||
if not normalized_line or normalized_line.startswith(":"):
|
||||
@@ -811,9 +820,30 @@ class CliMessageHandlerBase(BaseMessageHandler):
|
||||
should_stop = True
|
||||
break
|
||||
|
||||
# 达到预读字节上限,停止继续预读(避免无换行响应导致内存增长)
|
||||
if not should_stop and total_prefetched_bytes >= max_prefetch_bytes:
|
||||
logger.debug(
|
||||
f" [{self.request_id}] 预读达到字节上限,停止继续预读: "
|
||||
f"Provider={provider.name}, bytes={total_prefetched_bytes}, "
|
||||
f"max_bytes={max_prefetch_bytes}"
|
||||
)
|
||||
break
|
||||
|
||||
if should_stop or line_count >= max_prefetch_lines:
|
||||
break
|
||||
|
||||
# 预读结束后,检查是否为非 SSE 格式的 HTML/JSON 响应
|
||||
# 处理某些代理返回的纯 JSON 错误(可能无换行/多行 JSON)以及 HTML 页面(base_url 配置错误)
|
||||
if not should_stop and prefetched_chunks:
|
||||
check_prefetched_response_error(
|
||||
prefetched_chunks=prefetched_chunks,
|
||||
parser=provider_parser,
|
||||
request_id=self.request_id,
|
||||
provider_name=str(provider.name),
|
||||
endpoint_id=endpoint.id,
|
||||
base_url=endpoint.base_url,
|
||||
)
|
||||
|
||||
except (EmbeddedErrorException, ProviderTimeoutException, ProviderNotAvailableException):
|
||||
# 重新抛出可重试的 Provider 异常,触发故障转移
|
||||
raise
|
||||
|
||||
@@ -25,8 +25,17 @@ from src.api.handlers.base.content_extractors import (
|
||||
from src.api.handlers.base.parsers import get_parser_for_format
|
||||
from src.api.handlers.base.response_parser import ResponseParser
|
||||
from src.api.handlers.base.stream_context import StreamContext
|
||||
from src.api.handlers.base.utils import (
|
||||
check_html_response,
|
||||
check_prefetched_response_error,
|
||||
)
|
||||
from src.config.constants import StreamDefaults
|
||||
from src.config.settings import config
|
||||
from src.core.exceptions import EmbeddedErrorException, ProviderTimeoutException
|
||||
from src.core.exceptions import (
|
||||
EmbeddedErrorException,
|
||||
ProviderNotAvailableException,
|
||||
ProviderTimeoutException,
|
||||
)
|
||||
from src.core.logger import logger
|
||||
from src.models.database import Provider, ProviderEndpoint
|
||||
from src.utils.sse_parser import SSEEventParser
|
||||
@@ -165,6 +174,7 @@ class StreamProcessor:
|
||||
endpoint: ProviderEndpoint,
|
||||
ctx: StreamContext,
|
||||
max_prefetch_lines: int = 5,
|
||||
max_prefetch_bytes: int = StreamDefaults.MAX_PREFETCH_BYTES,
|
||||
) -> list:
|
||||
"""
|
||||
预读流的前几行,检测嵌套错误
|
||||
@@ -180,12 +190,14 @@ class StreamProcessor:
|
||||
endpoint: Endpoint 对象
|
||||
ctx: 流式上下文
|
||||
max_prefetch_lines: 最多预读行数
|
||||
max_prefetch_bytes: 最多预读字节数(避免无换行响应导致 buffer 增长)
|
||||
|
||||
Returns:
|
||||
预读的字节块列表
|
||||
|
||||
Raises:
|
||||
EmbeddedErrorException: 如果检测到嵌套错误
|
||||
ProviderNotAvailableException: 如果检测到 HTML 响应(配置错误)
|
||||
ProviderTimeoutException: 如果首字节超时(TTFB timeout)
|
||||
"""
|
||||
prefetched_chunks: list = []
|
||||
@@ -193,6 +205,7 @@ class StreamProcessor:
|
||||
buffer = b""
|
||||
line_count = 0
|
||||
should_stop = False
|
||||
total_prefetched_bytes = 0
|
||||
# 使用增量解码器处理跨 chunk 的 UTF-8 字符
|
||||
decoder = codecs.getincrementaldecoder("utf-8")(errors="replace")
|
||||
|
||||
@@ -206,11 +219,13 @@ class StreamProcessor:
|
||||
provider_name=str(provider.name),
|
||||
)
|
||||
prefetched_chunks.append(first_chunk)
|
||||
total_prefetched_bytes += len(first_chunk)
|
||||
buffer += first_chunk
|
||||
|
||||
# 继续读取剩余的预读数据
|
||||
async for chunk in aiter:
|
||||
prefetched_chunks.append(chunk)
|
||||
total_prefetched_bytes += len(chunk)
|
||||
buffer += chunk
|
||||
|
||||
# 尝试按行解析缓冲区
|
||||
@@ -228,10 +243,21 @@ class StreamProcessor:
|
||||
|
||||
line_count += 1
|
||||
|
||||
# 检测 HTML 响应(base_url 配置错误的常见症状)
|
||||
if check_html_response(line):
|
||||
logger.error(
|
||||
f" [{self.request_id}] 检测到 HTML 响应,可能是 base_url 配置错误: "
|
||||
f"Provider={provider.name}, Endpoint={endpoint.id[:8]}..., "
|
||||
f"base_url={endpoint.base_url}"
|
||||
)
|
||||
raise ProviderNotAvailableException(
|
||||
f"提供商 '{provider.name}' 返回了 HTML 页面而非 API 响应,"
|
||||
f"请检查 endpoint 的 base_url 配置是否正确"
|
||||
)
|
||||
|
||||
# 跳过空行和注释行
|
||||
if not line or line.startswith(":"):
|
||||
if line_count >= max_prefetch_lines:
|
||||
should_stop = True
|
||||
break
|
||||
continue
|
||||
|
||||
@@ -248,7 +274,6 @@ class StreamProcessor:
|
||||
data = json.loads(data_str)
|
||||
except json.JSONDecodeError:
|
||||
if line_count >= max_prefetch_lines:
|
||||
should_stop = True
|
||||
break
|
||||
continue
|
||||
|
||||
@@ -276,14 +301,34 @@ class StreamProcessor:
|
||||
should_stop = True
|
||||
break
|
||||
|
||||
# 达到预读字节上限,停止继续预读(避免无换行响应导致内存增长)
|
||||
if not should_stop and total_prefetched_bytes >= max_prefetch_bytes:
|
||||
logger.debug(
|
||||
f" [{self.request_id}] 预读达到字节上限,停止继续预读: "
|
||||
f"Provider={provider.name}, bytes={total_prefetched_bytes}, "
|
||||
f"max_bytes={max_prefetch_bytes}"
|
||||
)
|
||||
break
|
||||
|
||||
if should_stop or line_count >= max_prefetch_lines:
|
||||
break
|
||||
|
||||
except (EmbeddedErrorException, ProviderTimeoutException):
|
||||
# 预读结束后,检查是否为非 SSE 格式的 HTML/JSON 响应
|
||||
if not should_stop and prefetched_chunks:
|
||||
check_prefetched_response_error(
|
||||
prefetched_chunks=prefetched_chunks,
|
||||
parser=parser,
|
||||
request_id=self.request_id,
|
||||
provider_name=str(provider.name),
|
||||
endpoint_id=endpoint.id,
|
||||
base_url=endpoint.base_url,
|
||||
)
|
||||
|
||||
except (EmbeddedErrorException, ProviderNotAvailableException, ProviderTimeoutException):
|
||||
# 重新抛出可重试的 Provider 异常,触发故障转移
|
||||
raise
|
||||
except (OSError, IOError) as e:
|
||||
# 网络 I/O <EFBFBD><EFBFBD><EFBFBD>常:记录警告,可能需要重试
|
||||
# 网络 I/O 异常:记录警告,可能需要重试
|
||||
logger.warning(
|
||||
f" [{self.request_id}] 预读流时发生网络异常: {type(e).__name__}: {e}"
|
||||
)
|
||||
|
||||
@@ -2,8 +2,10 @@
|
||||
Handler 基础工具函数
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from src.core.exceptions import EmbeddedErrorException, ProviderNotAvailableException
|
||||
from src.core.logger import logger
|
||||
|
||||
|
||||
@@ -107,3 +109,95 @@ def build_sse_headers(extra_headers: Optional[Dict[str, str]] = None) -> Dict[st
|
||||
if extra_headers:
|
||||
headers.update(extra_headers)
|
||||
return headers
|
||||
|
||||
|
||||
def check_html_response(line: str) -> bool:
|
||||
"""
|
||||
检查行是否为 HTML 响应(base_url 配置错误的常见症状)
|
||||
|
||||
Args:
|
||||
line: 要检查的行内容
|
||||
|
||||
Returns:
|
||||
True 如果检测到 HTML 响应
|
||||
"""
|
||||
lower_line = line.lstrip().lower()
|
||||
return lower_line.startswith("<!doctype") or lower_line.startswith("<html")
|
||||
|
||||
|
||||
def check_prefetched_response_error(
|
||||
prefetched_chunks: list,
|
||||
parser: Any,
|
||||
request_id: str,
|
||||
provider_name: str,
|
||||
endpoint_id: Optional[str],
|
||||
base_url: Optional[str],
|
||||
) -> None:
|
||||
"""
|
||||
检查预读的响应是否为非 SSE 格式的错误响应(HTML 或纯 JSON 错误)
|
||||
|
||||
某些代理可能返回:
|
||||
1. HTML 页面(base_url 配置错误)
|
||||
2. 纯 JSON 错误(无换行或多行 JSON)
|
||||
|
||||
Args:
|
||||
prefetched_chunks: 预读的字节块列表
|
||||
parser: 响应解析器(需要有 is_error_response 和 parse_response 方法)
|
||||
request_id: 请求 ID(用于日志)
|
||||
provider_name: Provider 名称
|
||||
endpoint_id: Endpoint ID
|
||||
base_url: Endpoint 的 base_url
|
||||
|
||||
Raises:
|
||||
ProviderNotAvailableException: 如果检测到 HTML 响应
|
||||
EmbeddedErrorException: 如果检测到 JSON 错误响应
|
||||
"""
|
||||
if not prefetched_chunks:
|
||||
return
|
||||
|
||||
try:
|
||||
prefetched_bytes = b"".join(prefetched_chunks)
|
||||
stripped = prefetched_bytes.lstrip()
|
||||
|
||||
# 去除 BOM
|
||||
if stripped.startswith(b"\xef\xbb\xbf"):
|
||||
stripped = stripped[3:]
|
||||
|
||||
# HTML 响应(通常是 base_url 配置错误导致返回网页)
|
||||
lower_prefix = stripped[:32].lower()
|
||||
if lower_prefix.startswith(b"<!doctype") or lower_prefix.startswith(b"<html"):
|
||||
endpoint_short = endpoint_id[:8] + "..." if endpoint_id else "N/A"
|
||||
logger.error(
|
||||
f" [{request_id}] 检测到 HTML 响应,可能是 base_url 配置错误: "
|
||||
f"Provider={provider_name}, Endpoint={endpoint_short}, "
|
||||
f"base_url={base_url}"
|
||||
)
|
||||
raise ProviderNotAvailableException(
|
||||
f"提供商 '{provider_name}' 返回了 HTML 页面而非 API 响应,"
|
||||
f"请检查 endpoint 的 base_url 配置是否正确"
|
||||
)
|
||||
|
||||
# 纯 JSON(可能无换行/多行 JSON)
|
||||
if stripped.startswith(b"{") or stripped.startswith(b"["):
|
||||
payload_str = stripped.decode("utf-8", errors="replace").strip()
|
||||
data = json.loads(payload_str)
|
||||
if isinstance(data, dict) and parser.is_error_response(data):
|
||||
parsed = parser.parse_response(data, 200)
|
||||
logger.warning(
|
||||
f" [{request_id}] 检测到 JSON 错误响应: "
|
||||
f"Provider={provider_name}, "
|
||||
f"error_type={parsed.error_type}, "
|
||||
f"message={parsed.error_message}"
|
||||
)
|
||||
raise EmbeddedErrorException(
|
||||
provider_name=provider_name,
|
||||
error_code=(
|
||||
int(parsed.error_type)
|
||||
if parsed.error_type and parsed.error_type.isdigit()
|
||||
else None
|
||||
),
|
||||
error_message=parsed.error_message,
|
||||
error_status=parsed.error_type,
|
||||
)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user