refactor(handler): optimize stream processing and telemetry pipeline

- Enhance stream context for better token and latency tracking
- Refactor stream processor for improved performance metrics
- Improve telemetry integration with first_byte_time_ms support
- Add comprehensive stream context unit tests
This commit is contained in:
fawney19
2025-12-16 02:39:03 +08:00
parent 9b496abb73
commit ad1c8c394c
8 changed files with 428 additions and 108 deletions

View File

@@ -0,0 +1,117 @@
from src.api.handlers.base import stream_context
from src.api.handlers.base.stream_context import StreamContext
def test_collected_text_append_and_property() -> None:
ctx = StreamContext(model="test-model", api_format="OPENAI")
assert ctx.collected_text == ""
ctx.append_text("hello")
ctx.append_text(" ")
ctx.append_text("world")
assert ctx.collected_text == "hello world"
def test_reset_for_retry_clears_state() -> None:
ctx = StreamContext(model="test-model", api_format="OPENAI")
ctx.append_text("x")
ctx.update_usage(input_tokens=10, output_tokens=5)
ctx.parsed_chunks.append({"type": "chunk"})
ctx.chunk_count = 3
ctx.data_count = 2
ctx.has_completion = True
ctx.status_code = 418
ctx.error_message = "boom"
ctx.reset_for_retry()
assert ctx.collected_text == ""
assert ctx.input_tokens == 0
assert ctx.output_tokens == 0
assert ctx.parsed_chunks == []
assert ctx.chunk_count == 0
assert ctx.data_count == 0
assert ctx.has_completion is False
assert ctx.status_code == 200
assert ctx.error_message is None
def test_record_first_byte_time(monkeypatch) -> None:
"""测试记录首字时间"""
ctx = StreamContext(model="claude-3", api_format="claude_messages")
start_time = 100.0
monkeypatch.setattr(stream_context.time, "time", lambda: 100.0123) # 12.3ms
# 记录首字时间
ctx.record_first_byte_time(start_time)
# 验证首字时间已记录
assert ctx.first_byte_time_ms == 12
def test_record_first_byte_time_idempotent(monkeypatch) -> None:
"""测试首字时间只记录一次"""
ctx = StreamContext(model="claude-3", api_format="claude_messages")
start_time = 100.0
# 第一次记录
monkeypatch.setattr(stream_context.time, "time", lambda: 100.010)
ctx.record_first_byte_time(start_time)
first_value = ctx.first_byte_time_ms
# 第二次记录(应该被忽略)
monkeypatch.setattr(stream_context.time, "time", lambda: 100.020)
ctx.record_first_byte_time(start_time)
second_value = ctx.first_byte_time_ms
# 验证值没有改变
assert first_value == second_value
def test_reset_for_retry_clears_first_byte_time(monkeypatch) -> None:
"""测试重试时清除首字时间"""
ctx = StreamContext(model="claude-3", api_format="claude_messages")
start_time = 100.0
# 记录首字时间
monkeypatch.setattr(stream_context.time, "time", lambda: 100.010)
ctx.record_first_byte_time(start_time)
assert ctx.first_byte_time_ms is not None
# 重置
ctx.reset_for_retry()
# 验证首字时间已清除
assert ctx.first_byte_time_ms is None
def test_get_log_summary_with_first_byte_time() -> None:
"""测试日志摘要包含首字时间"""
ctx = StreamContext(model="claude-3", api_format="claude_messages")
ctx.provider_name = "anthropic"
ctx.input_tokens = 100
ctx.output_tokens = 50
ctx.first_byte_time_ms = 123
summary = ctx.get_log_summary("request-id-123", 456)
# 验证包含首字时间和总时间(大写格式)
assert "TTFB: 123ms" in summary
assert "Total: 456ms" in summary
assert "in:100 out:50" in summary
def test_get_log_summary_without_first_byte_time() -> None:
"""测试日志摘要在没有首字时间时的格式"""
ctx = StreamContext(model="claude-3", api_format="claude_messages")
ctx.provider_name = "anthropic"
ctx.input_tokens = 100
ctx.output_tokens = 50
# first_byte_time_ms 保持为 None
summary = ctx.get_log_summary("request-id-123", 456)
# 验证不包含首字时间标记,但有总时间(使用大写 TTFB 和 Total
assert "TTFB:" not in summary
assert "Total: 456ms" in summary
assert "in:100 out:50" in summary

View File

@@ -2,7 +2,7 @@
import pytest
from src.api.handlers.base.utils import extract_cache_creation_tokens
from src.api.handlers.base.utils import build_sse_headers, extract_cache_creation_tokens
class TestExtractCacheCreationTokens:
@@ -69,14 +69,16 @@ class TestExtractCacheCreationTokens:
}
assert extract_cache_creation_tokens(usage) == 123
def test_new_format_zero_fallback_to_old(self) -> None:
"""测试新格式为 0 时回退到旧格式"""
def test_new_format_zero_should_not_fallback(self) -> None:
"""测试新格式字段存在但为 0 时,不应 fallback 到旧格式"""
usage = {
"claude_cache_creation_5_m_tokens": 0,
"claude_cache_creation_1_h_tokens": 0,
"cache_creation_input_tokens": 456,
}
assert extract_cache_creation_tokens(usage) == 456
# 新格式字段存在,即使值为 0 也应该使用新格式(返回 0
# 而不是 fallback 到旧格式(返回 456
assert extract_cache_creation_tokens(usage) == 0
def test_unrelated_fields_ignored(self) -> None:
"""测试忽略无关字段"""
@@ -88,3 +90,15 @@ class TestExtractCacheCreationTokens:
"claude_cache_creation_1_h_tokens": 75,
}
assert extract_cache_creation_tokens(usage) == 125
class TestBuildSSEHeaders:
def test_default_headers(self) -> None:
headers = build_sse_headers()
assert headers["Cache-Control"] == "no-cache, no-transform"
assert headers["X-Accel-Buffering"] == "no"
def test_merge_extra_headers(self) -> None:
headers = build_sse_headers({"X-Test": "1", "Cache-Control": "custom"})
assert headers["X-Test"] == "1"
assert headers["Cache-Control"] == "custom"