Initial commit

This commit is contained in:
fawney19
2025-12-10 20:52:44 +08:00
commit f784106826
485 changed files with 110993 additions and 0 deletions

View File

@@ -0,0 +1,17 @@
"""
Claude Chat API 处理器
"""
from src.api.handlers.claude.adapter import (
ClaudeChatAdapter,
ClaudeTokenCountAdapter,
build_claude_adapter,
)
from src.api.handlers.claude.handler import ClaudeChatHandler
__all__ = [
"ClaudeChatAdapter",
"ClaudeTokenCountAdapter",
"build_claude_adapter",
"ClaudeChatHandler",
]

View File

@@ -0,0 +1,228 @@
"""
Claude Chat Adapter - 基于 ChatAdapterBase 的 Claude Chat API 适配器
处理 /v1/messages 端点的 Claude Chat 格式请求。
"""
from typing import Any, Dict, Optional, Type
from fastapi import HTTPException, Request
from fastapi.responses import JSONResponse
from src.api.base.adapter import ApiAdapter, ApiMode
from src.api.base.context import ApiRequestContext
from src.api.handlers.base.chat_adapter_base import ChatAdapterBase, register_adapter
from src.api.handlers.base.chat_handler_base import ChatHandlerBase
from src.core.logger import logger
from src.core.optimization_utils import TokenCounter
from src.models.claude import ClaudeMessagesRequest, ClaudeTokenCountRequest
class ClaudeCapabilityDetector:
"""Claude API 能力检测器"""
@staticmethod
def detect_from_headers(
headers: Dict[str, str],
request_body: Optional[Dict[str, Any]] = None,
) -> Dict[str, bool]:
"""
从 Claude 请求头检测能力需求
检测规则:
- anthropic-beta: context-1m-xxx -> context_1m: True
Args:
headers: 请求头字典
request_body: 请求体Claude 不使用,保留用于接口统一)
"""
requirements: Dict[str, bool] = {}
# 检查 anthropic-beta 请求头(大小写不敏感)
beta_header = None
for key, value in headers.items():
if key.lower() == "anthropic-beta":
beta_header = value
break
if beta_header:
# 检查是否包含 context-1m 标识
if "context-1m" in beta_header.lower():
requirements["context_1m"] = True
return requirements
@register_adapter
class ClaudeChatAdapter(ChatAdapterBase):
"""
Claude Chat API 适配器
处理 Claude Chat 格式的请求(/v1/messages 端点,进行格式验证)。
"""
FORMAT_ID = "CLAUDE"
name = "claude.chat"
@property
def HANDLER_CLASS(self) -> Type[ChatHandlerBase]:
"""延迟导入 Handler 类避免循环依赖"""
from src.api.handlers.claude.handler import ClaudeChatHandler
return ClaudeChatHandler
def __init__(self, allowed_api_formats: Optional[list[str]] = None):
super().__init__(allowed_api_formats or ["CLAUDE"])
logger.info(f"[{self.name}] 初始化Chat模式适配器 | API格式: {self.allowed_api_formats}")
def extract_api_key(self, request: Request) -> Optional[str]:
"""从请求中提取 API 密钥 (x-api-key)"""
return request.headers.get("x-api-key")
def detect_capability_requirements(
self,
headers: Dict[str, str],
request_body: Optional[Dict[str, Any]] = None,
) -> Dict[str, bool]:
"""检测 Claude 请求中隐含的能力需求"""
return ClaudeCapabilityDetector.detect_from_headers(headers)
# =========================================================================
# Claude 特定的计费逻辑
# =========================================================================
def compute_total_input_context(
self,
input_tokens: int,
cache_read_input_tokens: int,
cache_creation_input_tokens: int = 0,
) -> int:
"""
计算 Claude 的总输入上下文(用于阶梯计费判定)
Claude 的总输入 = input_tokens + cache_creation_input_tokens + cache_read_input_tokens
"""
return input_tokens + cache_creation_input_tokens + cache_read_input_tokens
def _validate_request_body(self, original_request_body: dict, path_params: dict = None):
"""验证请求体"""
try:
if not isinstance(original_request_body, dict):
raise ValueError("Request body must be a JSON object")
required_fields = ["model", "messages", "max_tokens"]
missing_fields = [f for f in required_fields if f not in original_request_body]
if missing_fields:
raise ValueError(f"Missing required fields: {', '.join(missing_fields)}")
request = ClaudeMessagesRequest.model_validate(
original_request_body,
strict=False,
)
except ValueError as e:
logger.error(f"请求体基本验证失败: {str(e)}")
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.warning(f"Pydantic验证警告(将继续处理): {str(e)}")
request = ClaudeMessagesRequest.model_construct(
model=original_request_body.get("model"),
max_tokens=original_request_body.get("max_tokens"),
messages=original_request_body.get("messages", []),
stream=original_request_body.get("stream", False),
)
return request
def _build_audit_metadata(self, _payload: Dict[str, Any], request_obj) -> Dict[str, Any]:
"""构建 Claude Chat 特定的审计元数据"""
role_counts: dict[str, int] = {}
for message in request_obj.messages:
role_counts[message.role] = role_counts.get(message.role, 0) + 1
return {
"action": "claude_messages",
"model": request_obj.model,
"stream": bool(request_obj.stream),
"max_tokens": request_obj.max_tokens,
"temperature": getattr(request_obj, "temperature", None),
"top_p": getattr(request_obj, "top_p", None),
"top_k": getattr(request_obj, "top_k", None),
"messages_count": len(request_obj.messages),
"message_roles": role_counts,
"stop_sequences": len(request_obj.stop_sequences or []),
"tools_count": len(request_obj.tools or []),
"system_present": bool(request_obj.system),
"metadata_present": bool(request_obj.metadata),
"thinking_enabled": bool(request_obj.thinking),
}
def build_claude_adapter(x_app_header: Optional[str]):
"""根据 x-app 头部构造 Chat 或 Claude Code 适配器。"""
if x_app_header and x_app_header.lower() == "cli":
from src.api.handlers.claude_cli.adapter import ClaudeCliAdapter
return ClaudeCliAdapter()
return ClaudeChatAdapter()
class ClaudeTokenCountAdapter(ApiAdapter):
"""计算 Claude 请求 Token 数的轻量适配器。"""
name = "claude.token_count"
mode = ApiMode.STANDARD
def extract_api_key(self, request: Request) -> Optional[str]:
"""从请求中提取 API 密钥 (x-api-key 或 Authorization: Bearer)"""
# 优先检查 x-api-key
api_key = request.headers.get("x-api-key")
if api_key:
return api_key
# 降级到 Authorization: Bearer
authorization = request.headers.get("authorization")
if authorization and authorization.startswith("Bearer "):
return authorization.replace("Bearer ", "")
return None
async def handle(self, context: ApiRequestContext):
payload = context.ensure_json_body()
try:
request = ClaudeTokenCountRequest.model_validate(payload, strict=False)
except Exception as e:
logger.error(f"Token count payload invalid: {e}")
raise HTTPException(status_code=400, detail="Invalid token count payload") from e
token_counter = TokenCounter()
total_tokens = 0
if request.system:
if isinstance(request.system, str):
total_tokens += token_counter.count_tokens(request.system, request.model)
elif isinstance(request.system, list):
for block in request.system:
if hasattr(block, "text"):
total_tokens += token_counter.count_tokens(block.text, request.model)
messages_dict = [
msg.model_dump() if hasattr(msg, "model_dump") else msg for msg in request.messages
]
total_tokens += token_counter.count_messages_tokens(messages_dict, request.model)
context.add_audit_metadata(
action="claude_token_count",
model=request.model,
messages_count=len(request.messages),
system_present=bool(request.system),
tools_count=len(request.tools or []),
thinking_enabled=bool(request.thinking),
input_tokens=total_tokens,
)
return JSONResponse({"input_tokens": total_tokens})
__all__ = [
"ClaudeChatAdapter",
"ClaudeTokenCountAdapter",
"build_claude_adapter",
]

View File

@@ -0,0 +1,490 @@
"""
OpenAI -> Claude 格式转换器
将 OpenAI Chat Completions API 格式转换为 Claude Messages API 格式。
"""
from __future__ import annotations
import json
import time
import uuid
from typing import Any, Dict, List, Optional, Union
from pydantic import BaseModel
class OpenAIToClaudeConverter:
"""
OpenAI -> Claude 格式转换器
支持:
- 请求转换OpenAI Chat Request -> Claude Request
- 响应转换OpenAI Chat Response -> Claude Response
- 流式转换OpenAI SSE -> Claude SSE
"""
# 内容类型常量
CONTENT_TYPE_TEXT = "text"
CONTENT_TYPE_IMAGE = "image"
CONTENT_TYPE_TOOL_USE = "tool_use"
CONTENT_TYPE_TOOL_RESULT = "tool_result"
# 停止原因映射OpenAI -> Claude
FINISH_REASON_MAP = {
"stop": "end_turn",
"length": "max_tokens",
"tool_calls": "tool_use",
"function_call": "tool_use",
"content_filter": "end_turn",
}
def __init__(self, model_mapping: Optional[Dict[str, str]] = None):
"""
Args:
model_mapping: OpenAI 模型到 Claude 模型的映射
"""
self._model_mapping = model_mapping or {}
# ==================== 请求转换 ====================
def convert_request(self, request: Union[Dict[str, Any], BaseModel]) -> Dict[str, Any]:
"""
将 OpenAI 请求转换为 Claude 格式
Args:
request: OpenAI 请求Dict 或 Pydantic 模型)
Returns:
Claude 格式的请求字典
"""
if hasattr(request, "model_dump"):
data = request.model_dump(exclude_none=True)
else:
data = dict(request)
# 模型映射
model = data.get("model", "")
claude_model = self._model_mapping.get(model, model)
# 处理消息
system_content: Optional[str] = None
claude_messages: List[Dict[str, Any]] = []
for message in data.get("messages", []):
role = message.get("role")
# 提取 system 消息
if role == "system":
system_content = self._collapse_content(message.get("content"))
continue
# 转换其他消息
converted = self._convert_message(message)
if converted:
claude_messages.append(converted)
# 构建 Claude 请求
result: Dict[str, Any] = {
"model": claude_model,
"messages": claude_messages,
"max_tokens": data.get("max_tokens") or 4096,
}
# 可选参数
if data.get("temperature") is not None:
result["temperature"] = data["temperature"]
if data.get("top_p") is not None:
result["top_p"] = data["top_p"]
if data.get("stream"):
result["stream"] = data["stream"]
if data.get("stop"):
result["stop_sequences"] = self._convert_stop(data["stop"])
if system_content:
result["system"] = system_content
# 工具转换
tools = self._convert_tools(data.get("tools"))
if tools:
result["tools"] = tools
tool_choice = self._convert_tool_choice(data.get("tool_choice"))
if tool_choice:
result["tool_choice"] = tool_choice
return result
def _convert_message(self, message: Dict[str, Any]) -> Optional[Dict[str, Any]]:
"""转换单条消息"""
role = message.get("role")
if role == "user":
return self._convert_user_message(message)
if role == "assistant":
return self._convert_assistant_message(message)
if role == "tool":
return self._convert_tool_message(message)
return None
def _convert_user_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
"""转换用户消息"""
content = message.get("content")
if isinstance(content, str) or content is None:
return {"role": "user", "content": content or ""}
# 转换内容数组
claude_content: List[Dict[str, Any]] = []
for item in content:
item_type = item.get("type")
if item_type == "text":
claude_content.append(
{"type": self.CONTENT_TYPE_TEXT, "text": item.get("text", "")}
)
elif item_type == "image_url":
image_url = (item.get("image_url") or {}).get("url", "")
claude_content.append(self._convert_image_url(image_url))
return {"role": "user", "content": claude_content}
def _convert_assistant_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
"""转换助手消息"""
content_blocks: List[Dict[str, Any]] = []
# 处理文本内容
content = message.get("content")
if isinstance(content, str):
content_blocks.append({"type": self.CONTENT_TYPE_TEXT, "text": content})
elif isinstance(content, list):
for part in content:
if part.get("type") == "text":
content_blocks.append(
{"type": self.CONTENT_TYPE_TEXT, "text": part.get("text", "")}
)
# 处理工具调用
for tool_call in message.get("tool_calls") or []:
if tool_call.get("type") == "function":
function = tool_call.get("function", {})
arguments = function.get("arguments", "{}")
try:
input_data = json.loads(arguments)
except json.JSONDecodeError:
input_data = {"raw": arguments}
content_blocks.append(
{
"type": self.CONTENT_TYPE_TOOL_USE,
"id": tool_call.get("id", ""),
"name": function.get("name", ""),
"input": input_data,
}
)
# 简化单文本内容
if not content_blocks:
return {"role": "assistant", "content": ""}
if len(content_blocks) == 1 and content_blocks[0]["type"] == self.CONTENT_TYPE_TEXT:
return {"role": "assistant", "content": content_blocks[0]["text"]}
return {"role": "assistant", "content": content_blocks}
def _convert_tool_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
"""转换工具结果消息"""
tool_content = message.get("content", "")
# 尝试解析 JSON
parsed_content = tool_content
if isinstance(tool_content, str):
try:
parsed_content = json.loads(tool_content)
except json.JSONDecodeError:
pass
tool_block = {
"type": self.CONTENT_TYPE_TOOL_RESULT,
"tool_use_id": message.get("tool_call_id", ""),
"content": parsed_content,
}
return {"role": "user", "content": [tool_block]}
def _convert_tools(
self, tools: Optional[List[Dict[str, Any]]]
) -> Optional[List[Dict[str, Any]]]:
"""转换工具定义"""
if not tools:
return None
result: List[Dict[str, Any]] = []
for tool in tools:
if tool.get("type") != "function":
continue
function = tool.get("function", {})
result.append(
{
"name": function.get("name", ""),
"description": function.get("description"),
"input_schema": function.get("parameters") or {},
}
)
return result if result else None
def _convert_tool_choice(
self, tool_choice: Optional[Union[str, Dict[str, Any]]]
) -> Optional[Dict[str, Any]]:
"""转换工具选择"""
if tool_choice is None:
return None
if tool_choice == "none":
return {"type": "none"}
if tool_choice == "auto":
return {"type": "auto"}
if tool_choice == "required":
return {"type": "any"}
if isinstance(tool_choice, dict) and tool_choice.get("type") == "function":
function = tool_choice.get("function", {})
return {"type": "tool_use", "name": function.get("name", "")}
return {"type": "auto"}
def _convert_image_url(self, image_url: str) -> Dict[str, Any]:
"""转换图片 URL"""
if image_url.startswith("data:"):
header, _, data = image_url.partition(",")
media_type = "image/jpeg"
if ";" in header:
media_type = header.split(";")[0].split(":")[-1]
return {
"type": self.CONTENT_TYPE_IMAGE,
"source": {
"type": "base64",
"media_type": media_type,
"data": data,
},
}
return {"type": self.CONTENT_TYPE_TEXT, "text": f"[Image: {image_url}]"}
def _convert_stop(self, stop: Optional[Union[str, List[str]]]) -> Optional[List[str]]:
"""转换停止序列"""
if stop is None:
return None
if isinstance(stop, str):
return [stop]
return stop
# ==================== 响应转换 ====================
def convert_response(self, response: Dict[str, Any]) -> Dict[str, Any]:
"""
将 OpenAI 响应转换为 Claude 格式
Args:
response: OpenAI 响应字典
Returns:
Claude 格式的响应字典
"""
choices = response.get("choices", [])
if not choices:
return self._empty_claude_response(response)
choice = choices[0]
message = choice.get("message", {})
# 构建 content 数组
content: List[Dict[str, Any]] = []
# 处理文本
text_content = message.get("content")
if text_content:
content.append(
{
"type": self.CONTENT_TYPE_TEXT,
"text": text_content,
}
)
# 处理工具调用
for tool_call in message.get("tool_calls") or []:
if tool_call.get("type") == "function":
function = tool_call.get("function", {})
arguments = function.get("arguments", "{}")
try:
input_data = json.loads(arguments)
except json.JSONDecodeError:
input_data = {"raw": arguments}
content.append(
{
"type": self.CONTENT_TYPE_TOOL_USE,
"id": tool_call.get("id", ""),
"name": function.get("name", ""),
"input": input_data,
}
)
# 转换 finish_reason
finish_reason = choice.get("finish_reason")
stop_reason = self.FINISH_REASON_MAP.get(finish_reason, "end_turn")
# 转换 usage
usage = response.get("usage", {})
claude_usage = {
"input_tokens": usage.get("prompt_tokens", 0),
"output_tokens": usage.get("completion_tokens", 0),
}
return {
"id": f"msg_{response.get('id', uuid.uuid4().hex[:8])}",
"type": "message",
"role": "assistant",
"model": response.get("model", ""),
"content": content,
"stop_reason": stop_reason,
"stop_sequence": None,
"usage": claude_usage,
}
def _empty_claude_response(self, response: Dict[str, Any]) -> Dict[str, Any]:
"""构建空的 Claude 响应"""
return {
"id": f"msg_{response.get('id', uuid.uuid4().hex[:8])}",
"type": "message",
"role": "assistant",
"model": response.get("model", ""),
"content": [],
"stop_reason": "end_turn",
"stop_sequence": None,
"usage": {"input_tokens": 0, "output_tokens": 0},
}
# ==================== 流式转换 ====================
def convert_stream_chunk(
self,
chunk: Dict[str, Any],
model: str = "",
message_id: Optional[str] = None,
message_started: bool = False,
) -> List[Dict[str, Any]]:
"""
将 OpenAI SSE chunk 转换为 Claude SSE 事件
Args:
chunk: OpenAI SSE chunk
model: 模型名称
message_id: 消息 ID
message_started: 是否已发送 message_start
Returns:
Claude SSE 事件列表
"""
events: List[Dict[str, Any]] = []
choices = chunk.get("choices", [])
if not choices:
return events
choice = choices[0]
delta = choice.get("delta", {})
finish_reason = choice.get("finish_reason")
# 处理角色(第一个 chunk
role = delta.get("role")
if role and not message_started:
msg_id = message_id or f"msg_{uuid.uuid4().hex[:8]}"
events.append(
{
"type": "message_start",
"message": {
"id": msg_id,
"type": "message",
"role": role,
"model": model,
"content": [],
"stop_reason": None,
"stop_sequence": None,
},
}
)
# 处理文本内容
content_delta = delta.get("content")
if isinstance(content_delta, str):
events.append(
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": content_delta},
}
)
# 处理工具调用
tool_calls = delta.get("tool_calls", [])
for tool_call in tool_calls:
index = tool_call.get("index", 0)
# 工具调用开始
if "id" in tool_call:
function = tool_call.get("function", {})
events.append(
{
"type": "content_block_start",
"index": index,
"content_block": {
"type": self.CONTENT_TYPE_TOOL_USE,
"id": tool_call["id"],
"name": function.get("name", ""),
},
}
)
# 工具调用参数增量
function = tool_call.get("function", {})
if "arguments" in function:
events.append(
{
"type": "content_block_delta",
"index": index,
"delta": {
"type": "input_json_delta",
"partial_json": function.get("arguments", ""),
},
}
)
# 处理结束
if finish_reason:
stop_reason = self.FINISH_REASON_MAP.get(finish_reason, "end_turn")
events.append(
{
"type": "message_delta",
"delta": {"stop_reason": stop_reason},
}
)
return events
# ==================== 工具方法 ====================
def _collapse_content(
self, content: Optional[Union[str, List[Dict[str, Any]]]]
) -> Optional[str]:
"""折叠内容为字符串"""
if isinstance(content, str):
return content
if not content:
return None
text_parts = [part.get("text", "") for part in content if part.get("type") == "text"]
return "\n\n".join(filter(None, text_parts)) or None
__all__ = ["OpenAIToClaudeConverter"]

View File

@@ -0,0 +1,150 @@
"""
Claude Chat Handler - 基于通用 Chat Handler 基类的简化实现
继承 ChatHandlerBase只需覆盖格式特定的方法。
代码量从原来的 ~1470 行减少到 ~120 行。
"""
from typing import Any, Dict, Optional
from src.api.handlers.base.chat_handler_base import ChatHandlerBase
class ClaudeChatHandler(ChatHandlerBase):
"""
Claude Chat Handler - 处理 Claude Chat/CLI API 格式的请求
格式特点:
- 使用 input_tokens/output_tokens
- 支持 cache_creation_input_tokens/cache_read_input_tokens
- 请求格式ClaudeMessagesRequest
"""
FORMAT_ID = "CLAUDE"
def extract_model_from_request(
self,
request_body: Dict[str, Any],
path_params: Optional[Dict[str, Any]] = None, # noqa: ARG002
) -> str:
"""
从请求中提取模型名 - Claude 格式实现
Claude API 的 model 在请求体顶级字段。
Args:
request_body: 请求体
path_params: URL 路径参数Claude 不使用)
Returns:
模型名
"""
model = request_body.get("model")
return str(model) if model else "unknown"
def apply_mapped_model(
self,
request_body: Dict[str, Any],
mapped_model: str,
) -> Dict[str, Any]:
"""
将映射后的模型名应用到请求体
Claude API 的 model 在请求体顶级字段。
Args:
request_body: 原始请求体
mapped_model: 映射后的模型名
Returns:
更新了 model 字段的请求体
"""
result = dict(request_body)
result["model"] = mapped_model
return result
async def _convert_request(self, request):
"""
将请求转换为 Claude 格式
Args:
request: 原始请求对象
Returns:
ClaudeMessagesRequest 对象
"""
from src.api.handlers.claude.converter import OpenAIToClaudeConverter
from src.models.claude import ClaudeMessagesRequest
from src.models.openai import OpenAIRequest
# 如果已经是 Claude 格式,直接返回
if isinstance(request, ClaudeMessagesRequest):
return request
# 如果是 OpenAI 格式,转换为 Claude 格式
if isinstance(request, OpenAIRequest):
converter = OpenAIToClaudeConverter()
claude_dict = converter.convert_request(request.dict())
return ClaudeMessagesRequest(**claude_dict)
# 如果是字典,根据内容判断格式
if isinstance(request, dict):
if "messages" in request and len(request["messages"]) > 0:
first_msg = request["messages"][0]
if "role" in first_msg and "content" in first_msg:
# 可能是 OpenAI 格式
converter = OpenAIToClaudeConverter()
claude_dict = converter.convert_request(request)
return ClaudeMessagesRequest(**claude_dict)
# 否则假设已经是 Claude 格式
return ClaudeMessagesRequest(**request)
return request
def _extract_usage(self, response: Dict) -> Dict[str, int]:
"""
从 Claude 响应中提取 token 使用情况
Claude 格式使用:
- input_tokens / output_tokens
- cache_creation_input_tokens / cache_read_input_tokens
"""
usage = response.get("usage", {})
input_tokens = usage.get("input_tokens", 0)
output_tokens = usage.get("output_tokens", 0)
cache_creation_input_tokens = usage.get("cache_creation_input_tokens", 0)
cache_read_input_tokens = usage.get("cache_read_input_tokens", 0)
# 处理新的 cache_creation 格式
if "cache_creation" in usage:
cache_creation_data = usage.get("cache_creation", {})
if not cache_creation_input_tokens:
cache_creation_input_tokens = cache_creation_data.get(
"ephemeral_5m_input_tokens", 0
) + cache_creation_data.get("ephemeral_1h_input_tokens", 0)
return {
"input_tokens": input_tokens,
"output_tokens": output_tokens,
"cache_creation_input_tokens": cache_creation_input_tokens,
"cache_read_input_tokens": cache_read_input_tokens,
}
def _normalize_response(self, response: Dict) -> Dict:
"""
规范化 Claude 响应
Args:
response: 原始响应
Returns:
规范化后的响应
"""
if self.response_normalizer and self.response_normalizer.should_normalize(response):
return self.response_normalizer.normalize_claude_response(
response_data=response,
request_id=self.request_id,
)
return response

View File

@@ -0,0 +1,241 @@
"""
Claude SSE 流解析器
解析 Claude Messages API 的 Server-Sent Events 流。
"""
from __future__ import annotations
import json
from typing import Any, Dict, List, Optional
class ClaudeStreamParser:
"""
Claude SSE 流解析器
解析 Claude Messages API 的 SSE 事件流。
事件类型:
- message_start: 消息开始,包含初始 message 对象
- content_block_start: 内容块开始
- content_block_delta: 内容块增量(文本、工具输入等)
- content_block_stop: 内容块结束
- message_delta: 消息增量,包含 stop_reason 和最终 usage
- message_stop: 消息结束
- ping: 心跳事件
- error: 错误事件
"""
# Claude SSE 事件类型
EVENT_MESSAGE_START = "message_start"
EVENT_MESSAGE_STOP = "message_stop"
EVENT_MESSAGE_DELTA = "message_delta"
EVENT_CONTENT_BLOCK_START = "content_block_start"
EVENT_CONTENT_BLOCK_STOP = "content_block_stop"
EVENT_CONTENT_BLOCK_DELTA = "content_block_delta"
EVENT_PING = "ping"
EVENT_ERROR = "error"
# Delta 类型
DELTA_TEXT = "text_delta"
DELTA_INPUT_JSON = "input_json_delta"
def parse_chunk(self, chunk: bytes | str) -> List[Dict[str, Any]]:
"""
解析 SSE 数据块
Args:
chunk: 原始 SSE 数据bytes 或 str
Returns:
解析后的事件列表
"""
if isinstance(chunk, bytes):
text = chunk.decode("utf-8")
else:
text = chunk
events: List[Dict[str, Any]] = []
lines = text.strip().split("\n")
current_event_type: Optional[str] = None
for line in lines:
line = line.strip()
if not line:
continue
# 解析事件类型行
if line.startswith("event: "):
current_event_type = line[7:]
continue
# 解析数据行
if line.startswith("data: "):
data_str = line[6:]
# 处理 [DONE] 标记
if data_str == "[DONE]":
events.append({"type": "__done__", "raw": "[DONE]"})
continue
try:
data = json.loads(data_str)
# 如果数据中没有 type使用事件行的类型
if "type" not in data and current_event_type:
data["type"] = current_event_type
events.append(data)
except json.JSONDecodeError:
# 无法解析的数据,跳过
pass
current_event_type = None
return events
def parse_line(self, line: str) -> Optional[Dict[str, Any]]:
"""
解析单行 SSE 数据
Args:
line: SSE 数据行(已去除 "data: " 前缀)
Returns:
解析后的事件字典,如果无法解析返回 None
"""
if not line or line == "[DONE]":
return None
try:
return json.loads(line)
except json.JSONDecodeError:
return None
def is_done_event(self, event: Dict[str, Any]) -> bool:
"""
判断是否为结束事件
Args:
event: 事件字典
Returns:
True 如果是结束事件
"""
event_type = event.get("type")
return event_type in (self.EVENT_MESSAGE_STOP, "__done__")
def is_error_event(self, event: Dict[str, Any]) -> bool:
"""
判断是否为错误事件
Args:
event: 事件字典
Returns:
True 如果是错误事件
"""
return event.get("type") == self.EVENT_ERROR
def get_event_type(self, event: Dict[str, Any]) -> Optional[str]:
"""
获取事件类型
Args:
event: 事件字典
Returns:
事件类型字符串
"""
return event.get("type")
def extract_text_delta(self, event: Dict[str, Any]) -> Optional[str]:
"""
从 content_block_delta 事件中提取文本增量
Args:
event: 事件字典
Returns:
文本增量,如果不是文本 delta 返回 None
"""
if event.get("type") != self.EVENT_CONTENT_BLOCK_DELTA:
return None
delta = event.get("delta", {})
if delta.get("type") == self.DELTA_TEXT:
return delta.get("text")
return None
def extract_usage(self, event: Dict[str, Any]) -> Optional[Dict[str, int]]:
"""
从事件中提取 token 使用量
Args:
event: 事件字典
Returns:
使用量字典,如果没有使用量信息返回 None
"""
event_type = event.get("type")
# message_start 事件包含初始 usage
if event_type == self.EVENT_MESSAGE_START:
message = event.get("message", {})
usage = message.get("usage", {})
if usage:
return {
"input_tokens": usage.get("input_tokens", 0),
"output_tokens": usage.get("output_tokens", 0),
"cache_creation_tokens": usage.get("cache_creation_input_tokens", 0),
"cache_read_tokens": usage.get("cache_read_input_tokens", 0),
}
# message_delta 事件包含最终 usage
if event_type == self.EVENT_MESSAGE_DELTA:
usage = event.get("usage", {})
if usage:
return {
"input_tokens": usage.get("input_tokens", 0),
"output_tokens": usage.get("output_tokens", 0),
"cache_creation_tokens": usage.get("cache_creation_input_tokens", 0),
"cache_read_tokens": usage.get("cache_read_input_tokens", 0),
}
return None
def extract_message_id(self, event: Dict[str, Any]) -> Optional[str]:
"""
从 message_start 事件中提取消息 ID
Args:
event: 事件字典
Returns:
消息 ID如果不是 message_start 返回 None
"""
if event.get("type") != self.EVENT_MESSAGE_START:
return None
message = event.get("message", {})
return message.get("id")
def extract_stop_reason(self, event: Dict[str, Any]) -> Optional[str]:
"""
从 message_delta 事件中提取停止原因
Args:
event: 事件字典
Returns:
停止原因,如果没有返回 None
"""
if event.get("type") != self.EVENT_MESSAGE_DELTA:
return None
delta = event.get("delta", {})
return delta.get("stop_reason")
__all__ = ["ClaudeStreamParser"]