From b202765be41c34a58d173db697bd52995f286ef3 Mon Sep 17 00:00:00 2001 From: fawney19 Date: Mon, 5 Jan 2026 00:13:23 +0800 Subject: [PATCH] =?UTF-8?q?perf:=20=E4=BC=98=E5=8C=96=E6=B5=81=E5=BC=8F?= =?UTF-8?q?=E5=93=8D=E5=BA=94=20TTFB=EF=BC=8C=E5=B0=86=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E5=BA=93=E7=8A=B6=E6=80=81=E6=9B=B4=E6=96=B0=E7=A7=BB=E8=87=B3?= =?UTF-8?q?=20yield=20=E5=90=8E=E6=89=A7=E8=A1=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - StreamUsageTracker: 先 yield 首个 chunk 再更新 streaming 状态 - EnhancedStreamUsageTracker: 同步添加 TTFB 记录和状态更新逻辑 - 确保客户端首字节响应不受数据库操作延迟影响 --- src/services/usage/stream.py | 61 ++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/src/services/usage/stream.py b/src/services/usage/stream.py index 753f350..8fef64f 100644 --- a/src/services/usage/stream.py +++ b/src/services/usage/stream.py @@ -459,34 +459,35 @@ class StreamUsageTracker: logger.debug(f"ID:{self.request_id} | 开始跟踪流式响应 | 估算输入tokens:{self.input_tokens}") chunk_count = 0 - first_chunk_received = False + first_byte_time_ms = None # 预先记录 TTFB,避免 yield 后计算不准确 try: async for chunk in stream: chunk_count += 1 # 保存原始字节流(用于错误诊断) self.raw_chunks.append(chunk) - # 第一个 chunk 收到时,更新状态为 streaming 并记录 TTFB - if not first_chunk_received: - first_chunk_received = True - if self.request_id: - try: - # 计算 TTFB(使用请求原始开始时间或 track_stream 开始时间) - base_time = self.request_start_time or self.start_time - first_byte_time_ms = int((time.time() - base_time) * 1000) if base_time else None - UsageService.update_usage_status( - db=self.db, - request_id=self.request_id, - status="streaming", - provider=self.provider, - first_byte_time_ms=first_byte_time_ms, - ) - except Exception as e: - logger.warning(f"更新使用记录状态为 streaming 失败: {e}") + # 第一个 chunk 收到时,记录 TTFB 时间点(但先不更新数据库,避免阻塞) + if chunk_count == 1: + # 计算 TTFB(使用请求原始开始时间或 track_stream 开始时间) + base_time = self.request_start_time or self.start_time + first_byte_time_ms = int((time.time() - base_time) * 1000) if base_time else None - # 返回原始块给客户端 + # 先返回原始块给客户端,确保 TTFB 不受数据库操作影响 yield chunk + # yield 后再更新数据库状态(仅第一个 chunk 时执行) + if chunk_count == 1 and self.request_id: + try: + UsageService.update_usage_status( + db=self.db, + request_id=self.request_id, + status="streaming", + provider=self.provider, + first_byte_time_ms=first_byte_time_ms, + ) + except Exception as e: + logger.warning(f"更新使用记录状态为 streaming 失败: {e}") + # 解析块以提取内容和使用信息(chunk是原始字节) content, usage = self.parse_stream_chunk(chunk) @@ -916,15 +917,35 @@ class EnhancedStreamUsageTracker(StreamUsageTracker): logger.debug(f"ID:{self.request_id} | 开始跟踪流式响应(Enhanced) | 估算输入tokens:{self.input_tokens}") chunk_count = 0 + first_byte_time_ms = None # 预先记录 TTFB,避免 yield 后计算不准确 try: async for chunk in stream: chunk_count += 1 # 保存原始字节流(用于错误诊断) self.raw_chunks.append(chunk) - # 返回原始块给客户端 + # 第一个 chunk 收到时,记录 TTFB 时间点(但先不更新数据库,避免阻塞) + if chunk_count == 1: + # 计算 TTFB(使用请求原始开始时间或 track_stream 开始时间) + base_time = self.request_start_time or self.start_time + first_byte_time_ms = int((time.time() - base_time) * 1000) if base_time else None + + # 先返回原始块给客户端,确保 TTFB 不受数据库操作影响 yield chunk + # yield 后再更新数据库状态(仅第一个 chunk 时执行) + if chunk_count == 1 and self.request_id: + try: + UsageService.update_usage_status( + db=self.db, + request_id=self.request_id, + status="streaming", + provider=self.provider, + first_byte_time_ms=first_byte_time_ms, + ) + except Exception as e: + logger.warning(f"更新使用记录状态为 streaming 失败: {e}") + # 解析块以提取内容和使用信息(chunk是原始字节) content, usage = self.parse_stream_chunk(chunk)