mirror of
https://github.com/fawney19/Aether.git
synced 2026-01-03 16:22:27 +08:00
feat: add TTFB timeout detection and improve stream handling
- Add stream first byte timeout (TTFB) detection to trigger failover when provider responds too slowly (configurable via STREAM_FIRST_BYTE_TIMEOUT) - Add rate limit fail-open/fail-close strategy configuration - Improve exception handling in stream prefetch with proper error classification - Refactor UsageService with shared _prepare_usage_record method - Add batch deletion for old usage records to avoid long transaction locks - Update CLI adapters to use proper User-Agent headers for each CLI client - Add composite indexes migration for usage table query optimization - Fix streaming status display in frontend to show TTFB during streaming - Remove sensitive JWT secret logging in auth service
This commit is contained in:
@@ -376,6 +376,9 @@ class BaseMessageHandler:
|
||||
|
||||
使用 asyncio 后台任务执行数据库更新,避免阻塞流式传输
|
||||
|
||||
注意:TTFB(首字节时间)由 StreamContext.record_first_byte_time() 记录,
|
||||
并在最终 record_success 时传递到数据库,避免重复记录导致数据不一致。
|
||||
|
||||
Args:
|
||||
request_id: 请求 ID,如果不传则使用 self.request_id
|
||||
"""
|
||||
@@ -407,6 +410,9 @@ class BaseMessageHandler:
|
||||
|
||||
使用 asyncio 后台任务执行数据库更新,避免阻塞流式传输
|
||||
|
||||
注意:TTFB(首字节时间)由 StreamContext.record_first_byte_time() 记录,
|
||||
并在最终 record_success 时传递到数据库,避免重复记录导致数据不一致。
|
||||
|
||||
Args:
|
||||
ctx: 流式上下文,包含 provider_name 和 mapped_model
|
||||
"""
|
||||
|
||||
@@ -57,8 +57,10 @@ from src.models.database import (
|
||||
ProviderEndpoint,
|
||||
User,
|
||||
)
|
||||
from src.config.settings import config
|
||||
from src.services.provider.transport import build_provider_url
|
||||
from src.utils.sse_parser import SSEEventParser
|
||||
from src.utils.timeout import read_first_chunk_with_ttfb_timeout
|
||||
|
||||
|
||||
class CliMessageHandlerBase(BaseMessageHandler):
|
||||
@@ -672,6 +674,8 @@ class CliMessageHandlerBase(BaseMessageHandler):
|
||||
|
||||
同时检测 HTML 响应(通常是 base_url 配置错误导致返回网页)。
|
||||
|
||||
首次读取时会应用 TTFB(首字节超时)检测,超时则触发故障转移。
|
||||
|
||||
Args:
|
||||
byte_iterator: 字节流迭代器
|
||||
provider: Provider 对象
|
||||
@@ -684,6 +688,7 @@ class CliMessageHandlerBase(BaseMessageHandler):
|
||||
Raises:
|
||||
EmbeddedErrorException: 如果检测到嵌套错误
|
||||
ProviderNotAvailableException: 如果检测到 HTML 响应(配置错误)
|
||||
ProviderTimeoutException: 如果首字节超时(TTFB timeout)
|
||||
"""
|
||||
prefetched_chunks: list = []
|
||||
max_prefetch_lines = 5 # 最多预读5行来检测错误
|
||||
@@ -704,7 +709,19 @@ class CliMessageHandlerBase(BaseMessageHandler):
|
||||
else:
|
||||
provider_parser = self.parser
|
||||
|
||||
async for chunk in byte_iterator:
|
||||
# 使用共享的 TTFB 超时函数读取首字节
|
||||
ttfb_timeout = config.stream_first_byte_timeout
|
||||
first_chunk, aiter = await read_first_chunk_with_ttfb_timeout(
|
||||
byte_iterator,
|
||||
timeout=ttfb_timeout,
|
||||
request_id=self.request_id,
|
||||
provider_name=str(provider.name),
|
||||
)
|
||||
prefetched_chunks.append(first_chunk)
|
||||
buffer += first_chunk
|
||||
|
||||
# 继续读取剩余的预读数据
|
||||
async for chunk in aiter:
|
||||
prefetched_chunks.append(chunk)
|
||||
buffer += chunk
|
||||
|
||||
@@ -785,12 +802,21 @@ class CliMessageHandlerBase(BaseMessageHandler):
|
||||
if should_stop or line_count >= max_prefetch_lines:
|
||||
break
|
||||
|
||||
except EmbeddedErrorException:
|
||||
# 重新抛出嵌套错误
|
||||
except (EmbeddedErrorException, ProviderTimeoutException, ProviderNotAvailableException):
|
||||
# 重新抛出可重试的 Provider 异常,触发故障转移
|
||||
raise
|
||||
except (OSError, IOError) as e:
|
||||
# 网络 I/O 异常:记录警告,可能需要重试
|
||||
logger.warning(
|
||||
f" [{self.request_id}] 预读流时发生网络异常: {type(e).__name__}: {e}"
|
||||
)
|
||||
except Exception as e:
|
||||
# 其他异常(如网络错误)在预读阶段发生,记录日志但不中断
|
||||
logger.debug(f" [{self.request_id}] 预读流时发生异常: {e}")
|
||||
# 未预期的严重异常:记录错误并重新抛出,避免掩盖问题
|
||||
logger.error(
|
||||
f" [{self.request_id}] 预读流时发生严重异常: {type(e).__name__}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
return prefetched_chunks
|
||||
|
||||
|
||||
@@ -25,10 +25,12 @@ from src.api.handlers.base.content_extractors import (
|
||||
from src.api.handlers.base.parsers import get_parser_for_format
|
||||
from src.api.handlers.base.response_parser import ResponseParser
|
||||
from src.api.handlers.base.stream_context import StreamContext
|
||||
from src.core.exceptions import EmbeddedErrorException
|
||||
from src.config.settings import config
|
||||
from src.core.exceptions import EmbeddedErrorException, ProviderTimeoutException
|
||||
from src.core.logger import logger
|
||||
from src.models.database import Provider, ProviderEndpoint
|
||||
from src.utils.sse_parser import SSEEventParser
|
||||
from src.utils.timeout import read_first_chunk_with_ttfb_timeout
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -170,6 +172,8 @@ class StreamProcessor:
|
||||
某些 Provider(如 Gemini)可能返回 HTTP 200,但在响应体中包含错误信息。
|
||||
这种情况需要在流开始输出之前检测,以便触发重试逻辑。
|
||||
|
||||
首次读取时会应用 TTFB(首字节超时)检测,超时则触发故障转移。
|
||||
|
||||
Args:
|
||||
byte_iterator: 字节流迭代器
|
||||
provider: Provider 对象
|
||||
@@ -182,6 +186,7 @@ class StreamProcessor:
|
||||
|
||||
Raises:
|
||||
EmbeddedErrorException: 如果检测到嵌套错误
|
||||
ProviderTimeoutException: 如果首字节超时(TTFB timeout)
|
||||
"""
|
||||
prefetched_chunks: list = []
|
||||
parser = self.get_parser_for_provider(ctx)
|
||||
@@ -192,7 +197,19 @@ class StreamProcessor:
|
||||
decoder = codecs.getincrementaldecoder("utf-8")(errors="replace")
|
||||
|
||||
try:
|
||||
async for chunk in byte_iterator:
|
||||
# 使用共享的 TTFB 超时函数读取首字节
|
||||
ttfb_timeout = config.stream_first_byte_timeout
|
||||
first_chunk, aiter = await read_first_chunk_with_ttfb_timeout(
|
||||
byte_iterator,
|
||||
timeout=ttfb_timeout,
|
||||
request_id=self.request_id,
|
||||
provider_name=str(provider.name),
|
||||
)
|
||||
prefetched_chunks.append(first_chunk)
|
||||
buffer += first_chunk
|
||||
|
||||
# 继续读取剩余的预读数据
|
||||
async for chunk in aiter:
|
||||
prefetched_chunks.append(chunk)
|
||||
buffer += chunk
|
||||
|
||||
@@ -262,10 +279,21 @@ class StreamProcessor:
|
||||
if should_stop or line_count >= max_prefetch_lines:
|
||||
break
|
||||
|
||||
except EmbeddedErrorException:
|
||||
except (EmbeddedErrorException, ProviderTimeoutException):
|
||||
# 重新抛出可重试的 Provider 异常,触发故障转移
|
||||
raise
|
||||
except (OSError, IOError) as e:
|
||||
# 网络 I/O <20><><EFBFBD>常:记录警告,可能需要重试
|
||||
logger.warning(
|
||||
f" [{self.request_id}] 预读流时发生网络异常: {type(e).__name__}: {e}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f" [{self.request_id}] 预读流时发生异常: {e}")
|
||||
# 未预期的严重异常:记录错误并重新抛出,避免掩盖问题
|
||||
logger.error(
|
||||
f" [{self.request_id}] 预读流时发生严重异常: {type(e).__name__}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
return prefetched_chunks
|
||||
|
||||
|
||||
Reference in New Issue
Block a user