mirror of
https://github.com/fawney19/Aether.git
synced 2026-01-08 10:42:29 +08:00
182 lines
4.3 KiB
Python
182 lines
4.3 KiB
Python
|
|
"""
|
|||
|
|
OpenAI SSE 流解析器
|
|||
|
|
|
|||
|
|
解析 OpenAI Chat Completions API 的 Server-Sent Events 流。
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
from __future__ import annotations
|
|||
|
|
|
|||
|
|
import json
|
|||
|
|
from typing import Any, Dict, List, Optional
|
|||
|
|
|
|||
|
|
|
|||
|
|
class OpenAIStreamParser:
|
|||
|
|
"""
|
|||
|
|
OpenAI SSE 流解析器
|
|||
|
|
|
|||
|
|
解析 OpenAI Chat Completions API 的 SSE 事件流。
|
|||
|
|
|
|||
|
|
OpenAI 流格式:
|
|||
|
|
- 每个 chunk 是一个 JSON 对象,包含 choices 数组
|
|||
|
|
- choices[0].delta 包含增量内容
|
|||
|
|
- choices[0].finish_reason 表示结束原因
|
|||
|
|
- 流结束时发送 data: [DONE]
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
def parse_chunk(self, chunk: bytes | str) -> List[Dict[str, Any]]:
|
|||
|
|
"""
|
|||
|
|
解析 SSE 数据块
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
chunk: 原始 SSE 数据(bytes 或 str)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
解析后的 chunk 列表
|
|||
|
|
"""
|
|||
|
|
if isinstance(chunk, bytes):
|
|||
|
|
text = chunk.decode("utf-8")
|
|||
|
|
else:
|
|||
|
|
text = chunk
|
|||
|
|
|
|||
|
|
chunks: List[Dict[str, Any]] = []
|
|||
|
|
lines = text.strip().split("\n")
|
|||
|
|
|
|||
|
|
for line in lines:
|
|||
|
|
line = line.strip()
|
|||
|
|
if not line:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# 解析数据行
|
|||
|
|
if line.startswith("data: "):
|
|||
|
|
data_str = line[6:]
|
|||
|
|
|
|||
|
|
# 处理 [DONE] 标记
|
|||
|
|
if data_str == "[DONE]":
|
|||
|
|
chunks.append({"__done__": True})
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
data = json.loads(data_str)
|
|||
|
|
chunks.append(data)
|
|||
|
|
except json.JSONDecodeError:
|
|||
|
|
# 无法解析的数据,跳过
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
return chunks
|
|||
|
|
|
|||
|
|
def parse_line(self, line: str) -> Optional[Dict[str, Any]]:
|
|||
|
|
"""
|
|||
|
|
解析单行 SSE 数据
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
line: SSE 数据行(已去除 "data: " 前缀)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
解析后的 chunk 字典,如果无法解析返回 None
|
|||
|
|
"""
|
|||
|
|
if not line or line == "[DONE]":
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
return json.loads(line)
|
|||
|
|
except json.JSONDecodeError:
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
def is_done_chunk(self, chunk: Dict[str, Any]) -> bool:
|
|||
|
|
"""
|
|||
|
|
判断是否为结束 chunk
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
chunk: chunk 字典
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
True 如果是结束 chunk
|
|||
|
|
"""
|
|||
|
|
# 内部标记
|
|||
|
|
if chunk.get("__done__"):
|
|||
|
|
return True
|
|||
|
|
|
|||
|
|
# 检查 finish_reason
|
|||
|
|
choices = chunk.get("choices", [])
|
|||
|
|
if choices:
|
|||
|
|
finish_reason = choices[0].get("finish_reason")
|
|||
|
|
return finish_reason is not None
|
|||
|
|
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def get_finish_reason(self, chunk: Dict[str, Any]) -> Optional[str]:
|
|||
|
|
"""
|
|||
|
|
获取结束原因
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
chunk: chunk 字典
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
结束原因字符串
|
|||
|
|
"""
|
|||
|
|
choices = chunk.get("choices", [])
|
|||
|
|
if choices:
|
|||
|
|
return choices[0].get("finish_reason")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
def extract_text_delta(self, chunk: Dict[str, Any]) -> Optional[str]:
|
|||
|
|
"""
|
|||
|
|
从 chunk 中提取文本增量
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
chunk: chunk 字典
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
文本增量,如果没有返回 None
|
|||
|
|
"""
|
|||
|
|
choices = chunk.get("choices", [])
|
|||
|
|
if not choices:
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
delta = choices[0].get("delta", {})
|
|||
|
|
content = delta.get("content")
|
|||
|
|
|
|||
|
|
if isinstance(content, str):
|
|||
|
|
return content
|
|||
|
|
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
def extract_tool_calls_delta(self, chunk: Dict[str, Any]) -> Optional[List[Dict[str, Any]]]:
|
|||
|
|
"""
|
|||
|
|
从 chunk 中提取工具调用增量
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
chunk: chunk 字典
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
工具调用列表,如果没有返回 None
|
|||
|
|
"""
|
|||
|
|
choices = chunk.get("choices", [])
|
|||
|
|
if not choices:
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
delta = choices[0].get("delta", {})
|
|||
|
|
return delta.get("tool_calls")
|
|||
|
|
|
|||
|
|
def extract_role(self, chunk: Dict[str, Any]) -> Optional[str]:
|
|||
|
|
"""
|
|||
|
|
从 chunk 中提取角色
|
|||
|
|
|
|||
|
|
通常只在第一个 chunk 中出现。
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
chunk: chunk 字典
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
角色字符串
|
|||
|
|
"""
|
|||
|
|
choices = chunk.get("choices", [])
|
|||
|
|
if not choices:
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
delta = choices[0].get("delta", {})
|
|||
|
|
return delta.get("role")
|
|||
|
|
|
|||
|
|
|
|||
|
|
__all__ = ["OpenAIStreamParser"]
|