refactor(handler): optimize stream processing and telemetry pipeline

- Enhance stream context for better token and latency tracking - Refactor stream processor for improved performance metrics - Improve telemetry integration with first_byte_time_ms support - Add comprehensive stream context unit tests
2026-01-05 17:22:28 +08:00 · 2025-12-16 02:39:03 +08:00
parent 9b496abb73
commit ad1c8c394c
8 changed files with 428 additions and 108 deletions
--- a/src/api/handlers/base/utils.py
+++ b/src/api/handlers/base/utils.py
@@ -2,7 +2,7 @@
 Handler 基础工具函数
 """

-from typing import Any, Dict
+from typing import Any, Dict, Optional


 def extract_cache_creation_tokens(usage: Dict[str, Any]) -> int:
@@ -22,10 +22,34 @@ def extract_cache_creation_tokens(usage: Dict[str, Any]) -> int:
    Returns:
        缓存创建 tokens 总数
    """
-    # 优先使用新格式
-    cache_5m = usage.get("claude_cache_creation_5_m_tokens", 0)
-    cache_1h = usage.get("claude_cache_creation_1_h_tokens", 0)
-    total = int(cache_5m) + int(cache_1h)
+    # 检查新格式字段是否存在（而非值是否为 0）
+    # 如果字段存在，即使值为 0 也是合法的，不应 fallback 到旧格式
+    has_new_format = (
+        "claude_cache_creation_5_m_tokens" in usage
+        or "claude_cache_creation_1_h_tokens" in usage
+    )

-    # 如果新格式不存在（total == 0），回退到旧格式
-    return total if total > 0 else int(usage.get("cache_creation_input_tokens", 0))
+    if has_new_format:
+        cache_5m = usage.get("claude_cache_creation_5_m_tokens", 0)
+        cache_1h = usage.get("claude_cache_creation_1_h_tokens", 0)
+        return int(cache_5m) + int(cache_1h)
+
+    # 回退到旧格式
+    return int(usage.get("cache_creation_input_tokens", 0))
+
+
+def build_sse_headers(extra_headers: Optional[Dict[str, str]] = None) -> Dict[str, str]:
+    """
+    构建 SSE（text/event-stream）推荐响应头，用于减少代理缓冲带来的卡顿/成段输出。
+
+    说明：
+    - Cache-Control: no-transform 可避免部分代理对流做压缩/改写导致缓冲
+    - X-Accel-Buffering: no 可显式提示 Nginx 关闭缓冲（即使全局已关闭也无害）
+    """
+    headers: Dict[str, str] = {
+        "Cache-Control": "no-cache, no-transform",
+        "X-Accel-Buffering": "no",
+    }
+    if extra_headers:
+        headers.update(extra_headers)
+    return headers