""" OpenAI SSE 流解析器 解析 OpenAI Chat Completions API 的 Server-Sent Events 流。 """ from __future__ import annotations import json from typing import Any, Dict, List, Optional class OpenAIStreamParser: """ OpenAI SSE 流解析器 解析 OpenAI Chat Completions API 的 SSE 事件流。 OpenAI 流格式: - 每个 chunk 是一个 JSON 对象,包含 choices 数组 - choices[0].delta 包含增量内容 - choices[0].finish_reason 表示结束原因 - 流结束时发送 data: [DONE] """ def parse_chunk(self, chunk: bytes | str) -> List[Dict[str, Any]]: """ 解析 SSE 数据块 Args: chunk: 原始 SSE 数据(bytes 或 str) Returns: 解析后的 chunk 列表 """ if isinstance(chunk, bytes): text = chunk.decode("utf-8") else: text = chunk chunks: List[Dict[str, Any]] = [] lines = text.strip().split("\n") for line in lines: line = line.strip() if not line: continue # 解析数据行 if line.startswith("data: "): data_str = line[6:] # 处理 [DONE] 标记 if data_str == "[DONE]": chunks.append({"__done__": True}) continue try: data = json.loads(data_str) chunks.append(data) except json.JSONDecodeError: # 无法解析的数据,跳过 pass return chunks def parse_line(self, line: str) -> Optional[Dict[str, Any]]: """ 解析单行 SSE 数据 Args: line: SSE 数据行(已去除 "data: " 前缀) Returns: 解析后的 chunk 字典,如果无法解析返回 None """ if not line or line == "[DONE]": return None try: return json.loads(line) except json.JSONDecodeError: return None def is_done_chunk(self, chunk: Dict[str, Any]) -> bool: """ 判断是否为结束 chunk Args: chunk: chunk 字典 Returns: True 如果是结束 chunk """ # 内部标记 if chunk.get("__done__"): return True # 检查 finish_reason choices = chunk.get("choices", []) if choices: finish_reason = choices[0].get("finish_reason") return finish_reason is not None return False def get_finish_reason(self, chunk: Dict[str, Any]) -> Optional[str]: """ 获取结束原因 Args: chunk: chunk 字典 Returns: 结束原因字符串 """ choices = chunk.get("choices", []) if choices: return choices[0].get("finish_reason") return None def extract_text_delta(self, chunk: Dict[str, Any]) -> Optional[str]: """ 从 chunk 中提取文本增量 Args: chunk: chunk 字典 Returns: 文本增量,如果没有返回 None """ choices = chunk.get("choices", []) if not choices: return None delta = choices[0].get("delta", {}) content = delta.get("content") if isinstance(content, str): return content return None def extract_tool_calls_delta(self, chunk: Dict[str, Any]) -> Optional[List[Dict[str, Any]]]: """ 从 chunk 中提取工具调用增量 Args: chunk: chunk 字典 Returns: 工具调用列表,如果没有返回 None """ choices = chunk.get("choices", []) if not choices: return None delta = choices[0].get("delta", {}) return delta.get("tool_calls") def extract_role(self, chunk: Dict[str, Any]) -> Optional[str]: """ 从 chunk 中提取角色 通常只在第一个 chunk 中出现。 Args: chunk: chunk 字典 Returns: 角色字符串 """ choices = chunk.get("choices", []) if not choices: return None delta = choices[0].get("delta", {}) return delta.get("role") __all__ = ["OpenAIStreamParser"]