mirror of
https://github.com/fawney19/Aether.git
synced 2026-01-05 09:12:27 +08:00
refactor(handler): optimize stream processing and telemetry pipeline
- Enhance stream context for better token and latency tracking - Refactor stream processor for improved performance metrics - Improve telemetry integration with first_byte_time_ms support - Add comprehensive stream context unit tests
This commit is contained in:
@@ -9,7 +9,9 @@
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import codecs
|
||||
import json
|
||||
import time
|
||||
from typing import Any, AsyncGenerator, Callable, Optional
|
||||
|
||||
import httpx
|
||||
@@ -36,6 +38,8 @@ class StreamProcessor:
|
||||
request_id: str,
|
||||
default_parser: ResponseParser,
|
||||
on_streaming_start: Optional[Callable[[], None]] = None,
|
||||
*,
|
||||
collect_text: bool = False,
|
||||
):
|
||||
"""
|
||||
初始化流处理器
|
||||
@@ -48,6 +52,7 @@ class StreamProcessor:
|
||||
self.request_id = request_id
|
||||
self.default_parser = default_parser
|
||||
self.on_streaming_start = on_streaming_start
|
||||
self.collect_text = collect_text
|
||||
|
||||
def get_parser_for_provider(self, ctx: StreamContext) -> ResponseParser:
|
||||
"""
|
||||
@@ -112,9 +117,10 @@ class StreamProcessor:
|
||||
)
|
||||
|
||||
# 提取文本
|
||||
text = parser.extract_text_content(data)
|
||||
if text:
|
||||
ctx.collected_text += text
|
||||
if self.collect_text:
|
||||
text = parser.extract_text_content(data)
|
||||
if text:
|
||||
ctx.append_text(text)
|
||||
|
||||
# 检查完成
|
||||
event_type = event_name or data.get("type", "")
|
||||
@@ -123,7 +129,7 @@ class StreamProcessor:
|
||||
|
||||
async def prefetch_and_check_error(
|
||||
self,
|
||||
line_iterator: Any,
|
||||
byte_iterator: Any,
|
||||
provider: Provider,
|
||||
endpoint: ProviderEndpoint,
|
||||
ctx: StreamContext,
|
||||
@@ -136,97 +142,126 @@ class StreamProcessor:
|
||||
这种情况需要在流开始输出之前检测,以便触发重试逻辑。
|
||||
|
||||
Args:
|
||||
line_iterator: 行迭代器
|
||||
byte_iterator: 字节流迭代器
|
||||
provider: Provider 对象
|
||||
endpoint: Endpoint 对象
|
||||
ctx: 流式上下文
|
||||
max_prefetch_lines: 最多预读行数
|
||||
|
||||
Returns:
|
||||
预读的行列表
|
||||
预读的字节块列表
|
||||
|
||||
Raises:
|
||||
EmbeddedErrorException: 如果检测到嵌套错误
|
||||
"""
|
||||
prefetched_lines: list = []
|
||||
prefetched_chunks: list = []
|
||||
parser = self.get_parser_for_provider(ctx)
|
||||
buffer = b""
|
||||
line_count = 0
|
||||
should_stop = False
|
||||
# 使用增量解码器处理跨 chunk 的 UTF-8 字符
|
||||
decoder = codecs.getincrementaldecoder("utf-8")(errors="replace")
|
||||
|
||||
try:
|
||||
line_count = 0
|
||||
async for line in line_iterator:
|
||||
prefetched_lines.append(line)
|
||||
line_count += 1
|
||||
async for chunk in byte_iterator:
|
||||
prefetched_chunks.append(chunk)
|
||||
buffer += chunk
|
||||
|
||||
normalized_line = line.rstrip("\r")
|
||||
if not normalized_line or normalized_line.startswith(":"):
|
||||
if line_count >= max_prefetch_lines:
|
||||
# 尝试按行解析缓冲区
|
||||
while b"\n" in buffer:
|
||||
line_bytes, buffer = buffer.split(b"\n", 1)
|
||||
try:
|
||||
# 使用增量解码器,可以正确处理跨 chunk 的多字节字符
|
||||
line = decoder.decode(line_bytes + b"\n", False).rstrip("\r\n")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[{self.request_id}] 预读时 UTF-8 解码失败: {e}, "
|
||||
f"bytes={line_bytes[:50]!r}"
|
||||
)
|
||||
continue
|
||||
|
||||
line_count += 1
|
||||
|
||||
# 跳过空行和注释行
|
||||
if not line or line.startswith(":"):
|
||||
if line_count >= max_prefetch_lines:
|
||||
should_stop = True
|
||||
break
|
||||
continue
|
||||
|
||||
# 尝试解析 SSE 数据
|
||||
data_str = line
|
||||
if line.startswith("data: "):
|
||||
data_str = line[6:]
|
||||
|
||||
if data_str == "[DONE]":
|
||||
should_stop = True
|
||||
break
|
||||
continue
|
||||
|
||||
# 尝试解析 SSE 数据
|
||||
data_str = normalized_line
|
||||
if normalized_line.startswith("data: "):
|
||||
data_str = normalized_line[6:]
|
||||
try:
|
||||
data = json.loads(data_str)
|
||||
except json.JSONDecodeError:
|
||||
if line_count >= max_prefetch_lines:
|
||||
should_stop = True
|
||||
break
|
||||
continue
|
||||
|
||||
if data_str == "[DONE]":
|
||||
# 使用解析器检查是否为错误响应
|
||||
if isinstance(data, dict) and parser.is_error_response(data):
|
||||
parsed = parser.parse_response(data, 200)
|
||||
logger.warning(
|
||||
f" [{self.request_id}] 检测到嵌套错误: "
|
||||
f"Provider={provider.name}, "
|
||||
f"error_type={parsed.error_type}, "
|
||||
f"message={parsed.error_message}"
|
||||
)
|
||||
raise EmbeddedErrorException(
|
||||
provider_name=str(provider.name),
|
||||
error_code=(
|
||||
int(parsed.error_type)
|
||||
if parsed.error_type and parsed.error_type.isdigit()
|
||||
else None
|
||||
),
|
||||
error_message=parsed.error_message,
|
||||
error_status=parsed.error_type,
|
||||
)
|
||||
|
||||
# 预读到有效数据,没有错误,停止预读
|
||||
should_stop = True
|
||||
break
|
||||
|
||||
try:
|
||||
data = json.loads(data_str)
|
||||
except json.JSONDecodeError:
|
||||
if line_count >= max_prefetch_lines:
|
||||
break
|
||||
continue
|
||||
|
||||
# 使用解析器检查是否为错误响应
|
||||
if isinstance(data, dict) and parser.is_error_response(data):
|
||||
parsed = parser.parse_response(data, 200)
|
||||
logger.warning(
|
||||
f" [{self.request_id}] 检测到嵌套错误: "
|
||||
f"Provider={provider.name}, "
|
||||
f"error_type={parsed.error_type}, "
|
||||
f"message={parsed.error_message}"
|
||||
)
|
||||
raise EmbeddedErrorException(
|
||||
provider_name=str(provider.name),
|
||||
error_code=(
|
||||
int(parsed.error_type)
|
||||
if parsed.error_type and parsed.error_type.isdigit()
|
||||
else None
|
||||
),
|
||||
error_message=parsed.error_message,
|
||||
error_status=parsed.error_type,
|
||||
)
|
||||
|
||||
# 预读到有效数据,没有错误,停止预读
|
||||
break
|
||||
if should_stop or line_count >= max_prefetch_lines:
|
||||
break
|
||||
|
||||
except EmbeddedErrorException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.debug(f" [{self.request_id}] 预读流时发生异常: {e}")
|
||||
|
||||
return prefetched_lines
|
||||
return prefetched_chunks
|
||||
|
||||
async def create_response_stream(
|
||||
self,
|
||||
ctx: StreamContext,
|
||||
line_iterator: Any,
|
||||
byte_iterator: Any,
|
||||
response_ctx: Any,
|
||||
http_client: httpx.AsyncClient,
|
||||
prefetched_lines: Optional[list] = None,
|
||||
prefetched_chunks: Optional[list] = None,
|
||||
*,
|
||||
start_time: Optional[float] = None,
|
||||
) -> AsyncGenerator[bytes, None]:
|
||||
"""
|
||||
创建响应流生成器
|
||||
|
||||
统一的流生成器,支持带预读数据和不带预读数据两种情况。
|
||||
从字节流中解析 SSE 数据并转发,支持预读数据。
|
||||
|
||||
Args:
|
||||
ctx: 流式上下文
|
||||
line_iterator: 行迭代器
|
||||
byte_iterator: 字节流迭代器
|
||||
response_ctx: HTTP 响应上下文管理器
|
||||
http_client: HTTP 客户端
|
||||
prefetched_lines: 预读的行列表(可选)
|
||||
prefetched_chunks: 预读的字节块列表(可选)
|
||||
start_time: 请求开始时间,用于计算 TTFB(可选)
|
||||
|
||||
Yields:
|
||||
编码后的响应数据块
|
||||
@@ -234,25 +269,82 @@ class StreamProcessor:
|
||||
try:
|
||||
sse_parser = SSEEventParser()
|
||||
streaming_started = False
|
||||
buffer = b""
|
||||
# 使用增量解码器处理跨 chunk 的 UTF-8 字符
|
||||
decoder = codecs.getincrementaldecoder("utf-8")(errors="replace")
|
||||
|
||||
# 处理预读数据
|
||||
if prefetched_lines:
|
||||
if prefetched_chunks:
|
||||
if not streaming_started and self.on_streaming_start:
|
||||
self.on_streaming_start()
|
||||
streaming_started = True
|
||||
|
||||
for line in prefetched_lines:
|
||||
for chunk in self._process_line(ctx, sse_parser, line):
|
||||
yield chunk
|
||||
for chunk in prefetched_chunks:
|
||||
# 记录首字时间 (TTFB) - 在 yield 之前记录
|
||||
if start_time is not None:
|
||||
ctx.record_first_byte_time(start_time)
|
||||
start_time = None # 只记录一次
|
||||
|
||||
# 把原始数据转发给客户端
|
||||
yield chunk
|
||||
|
||||
buffer += chunk
|
||||
# 处理缓冲区中的完整行
|
||||
while b"\n" in buffer:
|
||||
line_bytes, buffer = buffer.split(b"\n", 1)
|
||||
try:
|
||||
# 使用增量解码器,可以正确处理跨 chunk 的多字节字符
|
||||
line = decoder.decode(line_bytes + b"\n", False)
|
||||
self._process_line(ctx, sse_parser, line)
|
||||
except Exception as e:
|
||||
# 解码失败,记录警告但继续处理
|
||||
logger.warning(
|
||||
f"[{self.request_id}] UTF-8 解码失败: {e}, "
|
||||
f"bytes={line_bytes[:50]!r}"
|
||||
)
|
||||
continue
|
||||
|
||||
# 处理剩余的流数据
|
||||
async for line in line_iterator:
|
||||
async for chunk in byte_iterator:
|
||||
if not streaming_started and self.on_streaming_start:
|
||||
self.on_streaming_start()
|
||||
streaming_started = True
|
||||
|
||||
for chunk in self._process_line(ctx, sse_parser, line):
|
||||
yield chunk
|
||||
# 记录首字时间 (TTFB) - 在 yield 之前记录(如果预读数据为空)
|
||||
if start_time is not None:
|
||||
ctx.record_first_byte_time(start_time)
|
||||
start_time = None # 只记录一次
|
||||
|
||||
# 原始数据透传
|
||||
yield chunk
|
||||
|
||||
buffer += chunk
|
||||
# 处理缓冲区中的完整行
|
||||
while b"\n" in buffer:
|
||||
line_bytes, buffer = buffer.split(b"\n", 1)
|
||||
try:
|
||||
# 使用增量解码器,可以正确处理跨 chunk 的多字节字符
|
||||
line = decoder.decode(line_bytes + b"\n", False)
|
||||
self._process_line(ctx, sse_parser, line)
|
||||
except Exception as e:
|
||||
# 解码失败,记录警告但继续处理
|
||||
logger.warning(
|
||||
f"[{self.request_id}] UTF-8 解码失败: {e}, "
|
||||
f"bytes={line_bytes[:50]!r}"
|
||||
)
|
||||
continue
|
||||
|
||||
# 处理剩余的缓冲区数据(如果有未完成的行)
|
||||
if buffer:
|
||||
try:
|
||||
# 使用 final=True 处理最后的不完整字符
|
||||
line = decoder.decode(buffer, True)
|
||||
self._process_line(ctx, sse_parser, line)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[{self.request_id}] 处理剩余缓冲区失败: {e}, "
|
||||
f"bytes={buffer[:50]!r}"
|
||||
)
|
||||
|
||||
# 处理剩余事件
|
||||
for event in sse_parser.flush():
|
||||
@@ -268,7 +360,7 @@ class StreamProcessor:
|
||||
ctx: StreamContext,
|
||||
sse_parser: SSEEventParser,
|
||||
line: str,
|
||||
) -> list[bytes]:
|
||||
) -> None:
|
||||
"""
|
||||
处理单行数据
|
||||
|
||||
@@ -276,26 +368,17 @@ class StreamProcessor:
|
||||
ctx: 流式上下文
|
||||
sse_parser: SSE 解析器
|
||||
line: 原始行数据
|
||||
|
||||
Returns:
|
||||
要发送的数据块列表
|
||||
"""
|
||||
result: list[bytes] = []
|
||||
normalized_line = line.rstrip("\r")
|
||||
# SSEEventParser 以“去掉换行符”的单行文本作为输入;这里统一剔除 CR/LF,
|
||||
# 避免把空行误判成 "\n" 并导致事件边界解析错误。
|
||||
normalized_line = line.rstrip("\r\n")
|
||||
events = sse_parser.feed_line(normalized_line)
|
||||
|
||||
if normalized_line == "":
|
||||
for event in events:
|
||||
self.handle_sse_event(ctx, event.get("event"), event.get("data") or "")
|
||||
result.append(b"\n")
|
||||
else:
|
||||
if normalized_line != "":
|
||||
ctx.chunk_count += 1
|
||||
result.append((line + "\n").encode("utf-8"))
|
||||
|
||||
for event in events:
|
||||
self.handle_sse_event(ctx, event.get("event"), event.get("data") or "")
|
||||
|
||||
return result
|
||||
for event in events:
|
||||
self.handle_sse_event(ctx, event.get("event"), event.get("data") or "")
|
||||
|
||||
async def create_monitored_stream(
|
||||
self,
|
||||
@@ -317,16 +400,26 @@ class StreamProcessor:
|
||||
响应数据块
|
||||
"""
|
||||
try:
|
||||
# 断连检查频率:每次 await 都会引入调度开销,过于频繁会让流式"发一段停一段"
|
||||
# 这里按时间间隔节流,兼顾及时停止上游读取与吞吐平滑性。
|
||||
next_disconnect_check_at = 0.0
|
||||
disconnect_check_interval_s = 0.25
|
||||
|
||||
async for chunk in stream_generator:
|
||||
if await is_disconnected():
|
||||
logger.warning(f"ID:{self.request_id} | Client disconnected")
|
||||
ctx.status_code = 499 # Client Closed Request
|
||||
ctx.error_message = "client_disconnected"
|
||||
break
|
||||
now = time.monotonic()
|
||||
if now >= next_disconnect_check_at:
|
||||
next_disconnect_check_at = now + disconnect_check_interval_s
|
||||
if await is_disconnected():
|
||||
logger.warning(f"ID:{self.request_id} | Client disconnected")
|
||||
ctx.status_code = 499 # Client Closed Request
|
||||
ctx.error_message = "client_disconnected"
|
||||
|
||||
break
|
||||
yield chunk
|
||||
except asyncio.CancelledError:
|
||||
ctx.status_code = 499
|
||||
ctx.error_message = "client_disconnected"
|
||||
|
||||
raise
|
||||
except Exception as e:
|
||||
ctx.status_code = 500
|
||||
|
||||
Reference in New Issue
Block a user