Initial commit

2026-01-08 10:42:29 +08:00 · 2025-12-10 20:52:44 +08:00
commit f784106826
485 changed files with 110993 additions and 0 deletions
--- a/src/api/handlers/openai/stream_parser.py
+++ b/src/api/handlers/openai/stream_parser.py
@@ -0,0 +1,181 @@
+"""
+OpenAI SSE 流解析器
+
+解析 OpenAI Chat Completions API 的 Server-Sent Events 流。
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any, Dict, List, Optional
+
+
+class OpenAIStreamParser:
+    """
+    OpenAI SSE 流解析器
+
+    解析 OpenAI Chat Completions API 的 SSE 事件流。
+
+    OpenAI 流格式：
+    - 每个 chunk 是一个 JSON 对象，包含 choices 数组
+    - choices[0].delta 包含增量内容
+    - choices[0].finish_reason 表示结束原因
+    - 流结束时发送 data: [DONE]
+    """
+
+    def parse_chunk(self, chunk: bytes | str) -> List[Dict[str, Any]]:
+        """
+        解析 SSE 数据块
+
+        Args:
+            chunk: 原始 SSE 数据（bytes 或 str）
+
+        Returns:
+            解析后的 chunk 列表
+        """
+        if isinstance(chunk, bytes):
+            text = chunk.decode("utf-8")
+        else:
+            text = chunk
+
+        chunks: List[Dict[str, Any]] = []
+        lines = text.strip().split("\n")
+
+        for line in lines:
+            line = line.strip()
+            if not line:
+                continue
+
+            # 解析数据行
+            if line.startswith("data: "):
+                data_str = line[6:]
+
+                # 处理 [DONE] 标记
+                if data_str == "[DONE]":
+                    chunks.append({"__done__": True})
+                    continue
+
+                try:
+                    data = json.loads(data_str)
+                    chunks.append(data)
+                except json.JSONDecodeError:
+                    # 无法解析的数据，跳过
+                    pass
+
+        return chunks
+
+    def parse_line(self, line: str) -> Optional[Dict[str, Any]]:
+        """
+        解析单行 SSE 数据
+
+        Args:
+            line: SSE 数据行（已去除 "data: " 前缀）
+
+        Returns:
+            解析后的 chunk 字典，如果无法解析返回 None
+        """
+        if not line or line == "[DONE]":
+            return None
+
+        try:
+            return json.loads(line)
+        except json.JSONDecodeError:
+            return None
+
+    def is_done_chunk(self, chunk: Dict[str, Any]) -> bool:
+        """
+        判断是否为结束 chunk
+
+        Args:
+            chunk: chunk 字典
+
+        Returns:
+            True 如果是结束 chunk
+        """
+        # 内部标记
+        if chunk.get("__done__"):
+            return True
+
+        # 检查 finish_reason
+        choices = chunk.get("choices", [])
+        if choices:
+            finish_reason = choices[0].get("finish_reason")
+            return finish_reason is not None
+
+        return False
+
+    def get_finish_reason(self, chunk: Dict[str, Any]) -> Optional[str]:
+        """
+        获取结束原因
+
+        Args:
+            chunk: chunk 字典
+
+        Returns:
+            结束原因字符串
+        """
+        choices = chunk.get("choices", [])
+        if choices:
+            return choices[0].get("finish_reason")
+        return None
+
+    def extract_text_delta(self, chunk: Dict[str, Any]) -> Optional[str]:
+        """
+        从 chunk 中提取文本增量
+
+        Args:
+            chunk: chunk 字典
+
+        Returns:
+            文本增量，如果没有返回 None
+        """
+        choices = chunk.get("choices", [])
+        if not choices:
+            return None
+
+        delta = choices[0].get("delta", {})
+        content = delta.get("content")
+
+        if isinstance(content, str):
+            return content
+
+        return None
+
+    def extract_tool_calls_delta(self, chunk: Dict[str, Any]) -> Optional[List[Dict[str, Any]]]:
+        """
+        从 chunk 中提取工具调用增量
+
+        Args:
+            chunk: chunk 字典
+
+        Returns:
+            工具调用列表，如果没有返回 None
+        """
+        choices = chunk.get("choices", [])
+        if not choices:
+            return None
+
+        delta = choices[0].get("delta", {})
+        return delta.get("tool_calls")
+
+    def extract_role(self, chunk: Dict[str, Any]) -> Optional[str]:
+        """
+        从 chunk 中提取角色
+
+        通常只在第一个 chunk 中出现。
+
+        Args:
+            chunk: chunk 字典
+
+        Returns:
+            角色字符串
+        """
+        choices = chunk.get("choices", [])
+        if not choices:
+            return None
+
+        delta = choices[0].get("delta", {})
+        return delta.get("role")
+
+
+__all__ = ["OpenAIStreamParser"]