refactor(handler): optimize stream processing and telemetry pipeline

- Enhance stream context for better token and latency tracking
- Refactor stream processor for improved performance metrics
- Improve telemetry integration with first_byte_time_ms support
- Add comprehensive stream context unit tests
This commit is contained in:
fawney19
2025-12-16 02:39:03 +08:00
parent 9b496abb73
commit ad1c8c394c
8 changed files with 428 additions and 108 deletions

View File

@@ -2,7 +2,7 @@
Handler 基础工具函数
"""
from typing import Any, Dict
from typing import Any, Dict, Optional
def extract_cache_creation_tokens(usage: Dict[str, Any]) -> int:
@@ -22,10 +22,34 @@ def extract_cache_creation_tokens(usage: Dict[str, Any]) -> int:
Returns:
缓存创建 tokens 总数
"""
# 优先使用新格式
cache_5m = usage.get("claude_cache_creation_5_m_tokens", 0)
cache_1h = usage.get("claude_cache_creation_1_h_tokens", 0)
total = int(cache_5m) + int(cache_1h)
# 检查新格式字段是否存在(而非值是否为 0
# 如果字段存在,即使值为 0 也是合法的,不应 fallback 到旧格式
has_new_format = (
"claude_cache_creation_5_m_tokens" in usage
or "claude_cache_creation_1_h_tokens" in usage
)
# 如果新格式不存在total == 0回退到旧格式
return total if total > 0 else int(usage.get("cache_creation_input_tokens", 0))
if has_new_format:
cache_5m = usage.get("claude_cache_creation_5_m_tokens", 0)
cache_1h = usage.get("claude_cache_creation_1_h_tokens", 0)
return int(cache_5m) + int(cache_1h)
# 回退到旧格式
return int(usage.get("cache_creation_input_tokens", 0))
def build_sse_headers(extra_headers: Optional[Dict[str, str]] = None) -> Dict[str, str]:
"""
构建 SSEtext/event-stream推荐响应头用于减少代理缓冲带来的卡顿/成段输出。
说明:
- Cache-Control: no-transform 可避免部分代理对流做压缩/改写导致缓冲
- X-Accel-Buffering: no 可显式提示 Nginx 关闭缓冲(即使全局已关闭也无害)
"""
headers: Dict[str, str] = {
"Cache-Control": "no-cache, no-transform",
"X-Accel-Buffering": "no",
}
if extra_headers:
headers.update(extra_headers)
return headers