6 Commits

Author SHA1 Message Date
fawney19
2fa64b98e3 fix: deploy.sh 将 Dockerfile.app.local 纳入代码变化检测 2025-12-24 18:10:42 +08:00
fawney19
75d7e89cbb perf: 添加 gunicorn --preload 参数优化内存占用
Worker 进程共享只读内存(代码、常量),可减少约 30-40% 内存占用

Closes #44
2025-12-24 18:10:42 +08:00
fawney19
d73a443484 fix: 修复初次执行 migrate.sh 时 usage 表不存在的问题 (#43)
- 在 baseline 中直接创建 usage 表复合索引
- 在后续迁移中添加表存在性检查,避免 AUTOCOMMIT 连接看不到事务中的表
2025-12-24 18:10:42 +08:00
Hwwwww-dev
15a9b88fc8 feat: enhance extract_cache_creation_tokens function to support three formats[#41] (#42)
- Updated the function to prioritize nested format, followed by flat new format, and finally old format for cache creation tokens.
- Added fallback logic for cases where the preferred formats return zero.
- Expanded unit tests to cover new format scenarios and ensure proper functionality across all formats.

Co-authored-by: heweimin <heweimin@retaileye.ai>
2025-12-24 01:31:45 +08:00
fawney19
03eb7203ec fix(api): 同步 chat_handler_base 使用 aiter_bytes 支持自动解压 2025-12-24 01:13:35 +08:00
hank9999
e38cd6819b fix(api): 优化字节流迭代器以支持自动解压 gzip (#39) 2025-12-24 01:11:35 +08:00
9 changed files with 173 additions and 67 deletions

View File

@@ -105,7 +105,7 @@ RUN printf '%s\n' \
'stderr_logfile=/var/log/nginx/error.log' \
'' \
'[program:app]' \
'command=gunicorn src.main:app -w %(ENV_GUNICORN_WORKERS)s -k uvicorn.workers.UvicornWorker --bind 0.0.0.0:%(ENV_PORT)s --timeout 120 --access-logfile - --error-logfile - --log-level info' \
'command=gunicorn src.main:app --preload -w %(ENV_GUNICORN_WORKERS)s -k uvicorn.workers.UvicornWorker --bind 0.0.0.0:%(ENV_PORT)s --timeout 120 --access-logfile - --error-logfile - --log-level info' \
'directory=/app' \
'autostart=true' \
'autorestart=true' \

View File

@@ -106,7 +106,7 @@ RUN printf '%s\n' \
'stderr_logfile=/var/log/nginx/error.log' \
'' \
'[program:app]' \
'command=gunicorn src.main:app -w %(ENV_GUNICORN_WORKERS)s -k uvicorn.workers.UvicornWorker --bind 0.0.0.0:%(ENV_PORT)s --timeout 120 --access-logfile - --error-logfile - --log-level info' \
'command=gunicorn src.main:app --preload -w %(ENV_GUNICORN_WORKERS)s -k uvicorn.workers.UvicornWorker --bind 0.0.0.0:%(ENV_PORT)s --timeout 120 --access-logfile - --error-logfile - --log-level info' \
'directory=/app' \
'autostart=true' \
'autorestart=true' \

View File

@@ -394,6 +394,10 @@ def upgrade() -> None:
index=True,
),
)
# usage 表复合索引(优化常见查询)
op.create_index("idx_usage_user_created", "usage", ["user_id", "created_at"])
op.create_index("idx_usage_apikey_created", "usage", ["api_key_id", "created_at"])
op.create_index("idx_usage_provider_model_created", "usage", ["provider", "model", "created_at"])
# ==================== user_quotas ====================
op.create_table(

View File

@@ -20,12 +20,28 @@ def upgrade() -> None:
使用 CONCURRENTLY 创建索引以避免锁表,
但需要在 AUTOCOMMIT 模式下执行(不能在事务内)
注意如果是从全新数据库执行baseline 刚创建表),
由于 AUTOCOMMIT 连接看不到事务中未提交的表,会跳过索引创建。
这种情况下索引会在下次迁移或手动创建。
"""
conn = op.get_bind()
engine = conn.engine
# 使用新连接并设置 AUTOCOMMIT 模式以支持 CREATE INDEX CONCURRENTLY
with engine.connect().execution_options(isolation_level="AUTOCOMMIT") as autocommit_conn:
# 检查 usage 表是否存在(在 AUTOCOMMIT 连接中可见)
# 如果表不存在(例如 baseline 迁移还在事务中),跳过索引创建
result = autocommit_conn.execute(text(
"SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = 'usage')"
))
table_exists = result.scalar()
if not table_exists:
# 表在当前连接不可见(可能 baseline 还在事务中),跳过
# 索引将通过后续迁移或手动创建
return
# 使用 IF NOT EXISTS 避免重复创建,无需单独检查索引是否存在
# 1. user_id + created_at 复合索引 (用户用量查询)

View File

@@ -26,10 +26,13 @@ calc_deps_hash() {
cat pyproject.toml frontend/package.json frontend/package-lock.json Dockerfile.base.local 2>/dev/null | md5sum | cut -d' ' -f1
}
# 计算代码文件的哈希值
# 计算代码文件的哈希值(包含 Dockerfile.app.local
calc_code_hash() {
find src -type f -name "*.py" 2>/dev/null | sort | xargs cat 2>/dev/null | md5sum | cut -d' ' -f1
find frontend/src -type f \( -name "*.vue" -o -name "*.ts" -o -name "*.tsx" -o -name "*.js" \) 2>/dev/null | sort | xargs cat 2>/dev/null | md5sum | cut -d' ' -f1
{
cat Dockerfile.app.local 2>/dev/null
find src -type f -name "*.py" 2>/dev/null | sort | xargs cat 2>/dev/null
find frontend/src -type f \( -name "*.vue" -o -name "*.ts" -o -name "*.tsx" -o -name "*.js" \) 2>/dev/null | sort | xargs cat 2>/dev/null
} | md5sum | cut -d' ' -f1
}
# 计算迁移文件的哈希值

View File

@@ -484,9 +484,8 @@ class ChatHandlerBase(BaseMessageHandler, ABC):
stream_response.raise_for_status()
# 使用字节流迭代器(避免 aiter_lines 的性能问题)
# aiter_raw() 返回原始数据块,无缓冲,实现真正的流式传输
byte_iterator = stream_response.aiter_raw()
# 使用字节流迭代器(避免 aiter_lines 的性能问题, aiter_bytes 会自动解压 gzip/deflate
byte_iterator = stream_response.aiter_bytes()
# 预读检测嵌套错误
prefetched_chunks = await stream_processor.prefetch_and_check_error(

View File

@@ -476,8 +476,8 @@ class CliMessageHandlerBase(BaseMessageHandler):
stream_response.raise_for_status()
# 使用字节流迭代器(避免 aiter_lines 的性能问题)
byte_iterator = stream_response.aiter_raw()
# 使用字节流迭代器(避免 aiter_lines 的性能问题, aiter_bytes 会自动解压 gzip/deflate
byte_iterator = stream_response.aiter_bytes()
# 预读第一个数据块检测嵌套错误HTTP 200 但响应体包含错误)
prefetched_chunks = await self._prefetch_and_check_embedded_error(
@@ -531,7 +531,7 @@ class CliMessageHandlerBase(BaseMessageHandler):
# 检查是否需要格式转换
needs_conversion = self._needs_format_conversion(ctx)
async for chunk in stream_response.aiter_raw():
async for chunk in stream_response.aiter_bytes():
# 在第一次输出数据前更新状态为 streaming
if not streaming_status_updated:
self._update_usage_to_streaming_with_ctx(ctx)

View File

@@ -4,17 +4,28 @@ Handler 基础工具函数
from typing import Any, Dict, Optional
from src.core.logger import logger
def extract_cache_creation_tokens(usage: Dict[str, Any]) -> int:
"""
提取缓存创建 tokens兼容新旧格式)
提取缓存创建 tokens兼容三种格式)
Claude API 在不同版本中使用了不同的字段名来表示缓存创建 tokens
- 新格式2024年后使用 claude_cache_creation_5_m_tokens 和
claude_cache_creation_1_h_tokens 分别表示 5 分钟和 1 小时缓存
- 旧格式:使用 cache_creation_input_tokens 表示总的缓存创建 tokens
根据 Anthropic API 文档,支持三种格式(按优先级)
此函数自动检测并适配两种格式优先使用新格式。
1. **嵌套格式优先级最高)**
usage.cache_creation.ephemeral_5m_input_tokens
usage.cache_creation.ephemeral_1h_input_tokens
2. **扁平新格式(优先级第二)**
usage.claude_cache_creation_5_m_tokens
usage.claude_cache_creation_1_h_tokens
3. **旧格式(优先级第三)**
usage.cache_creation_input_tokens
优先使用嵌套格式,如果嵌套格式字段存在但值为 0则智能 fallback 到旧格式。
扁平格式和嵌套格式互斥,按顺序检查。
Args:
usage: API 响应中的 usage 字典
@@ -22,20 +33,63 @@ def extract_cache_creation_tokens(usage: Dict[str, Any]) -> int:
Returns:
缓存创建 tokens 总数
"""
# 检查新格式字段是否存在(而非值是否为 0
# 如果字段存在,即使值为 0 也是合法的,不应 fallback 到旧格式
has_new_format = (
# 1. 检查嵌套格式(最新格式
cache_creation = usage.get("cache_creation")
if isinstance(cache_creation, dict):
cache_5m = int(cache_creation.get("ephemeral_5m_input_tokens", 0))
cache_1h = int(cache_creation.get("ephemeral_1h_input_tokens", 0))
total = cache_5m + cache_1h
if total > 0:
logger.debug(
f"Using nested cache_creation: 5m={cache_5m}, 1h={cache_1h}, total={total}"
)
return total
# 嵌套格式存在但为 0fallback 到旧格式
old_format = int(usage.get("cache_creation_input_tokens", 0))
if old_format > 0:
logger.debug(
f"Nested cache_creation is 0, using old format: {old_format}"
)
return old_format
# 都是 0返回 0
return 0
# 2. 检查扁平新格式
has_flat_format = (
"claude_cache_creation_5_m_tokens" in usage
or "claude_cache_creation_1_h_tokens" in usage
)
if has_new_format:
cache_5m = usage.get("claude_cache_creation_5_m_tokens", 0)
cache_1h = usage.get("claude_cache_creation_1_h_tokens", 0)
return int(cache_5m) + int(cache_1h)
if has_flat_format:
cache_5m = int(usage.get("claude_cache_creation_5_m_tokens", 0))
cache_1h = int(usage.get("claude_cache_creation_1_h_tokens", 0))
total = cache_5m + cache_1h
# 回退到旧格式
return int(usage.get("cache_creation_input_tokens", 0))
if total > 0:
logger.debug(
f"Using flat new format: 5m={cache_5m}, 1h={cache_1h}, total={total}"
)
return total
# 扁平格式存在但为 0fallback 到旧格式
old_format = int(usage.get("cache_creation_input_tokens", 0))
if old_format > 0:
logger.debug(
f"Flat cache_creation is 0, using old format: {old_format}"
)
return old_format
# 都是 0返回 0
return 0
# 3. 回退到旧格式
old_format = int(usage.get("cache_creation_input_tokens", 0))
if old_format > 0:
logger.debug(f"Using old format: cache_creation_input_tokens={old_format}")
return old_format
def build_sse_headers(extra_headers: Optional[Dict[str, str]] = None) -> Dict[str, str]:

View File

@@ -8,86 +8,116 @@ from src.api.handlers.base.utils import build_sse_headers, extract_cache_creatio
class TestExtractCacheCreationTokens:
"""测试 extract_cache_creation_tokens 函数"""
def test_new_format_only(self) -> None:
"""测试只有新格式字段"""
# === 嵌套格式测试(优先级最高)===
def test_nested_cache_creation_format(self) -> None:
"""测试嵌套格式正常情况"""
usage = {
"cache_creation": {
"ephemeral_5m_input_tokens": 456,
"ephemeral_1h_input_tokens": 100,
}
}
assert extract_cache_creation_tokens(usage) == 556
def test_nested_cache_creation_with_old_format_fallback(self) -> None:
"""测试嵌套格式为 0 时回退到旧格式"""
usage = {
"cache_creation": {
"ephemeral_5m_input_tokens": 0,
"ephemeral_1h_input_tokens": 0,
},
"cache_creation_input_tokens": 549,
}
assert extract_cache_creation_tokens(usage) == 549
def test_nested_has_priority_over_flat(self) -> None:
"""测试嵌套格式优先于扁平格式"""
usage = {
"cache_creation": {
"ephemeral_5m_input_tokens": 100,
"ephemeral_1h_input_tokens": 200,
},
"claude_cache_creation_5_m_tokens": 999, # 应该被忽略
"claude_cache_creation_1_h_tokens": 888, # 应该被忽略
"cache_creation_input_tokens": 777, # 应该被忽略
}
assert extract_cache_creation_tokens(usage) == 300
# === 扁平格式测试(优先级第二)===
def test_flat_new_format_still_works(self) -> None:
"""测试扁平新格式兼容性"""
usage = {
"claude_cache_creation_5_m_tokens": 100,
"claude_cache_creation_1_h_tokens": 200,
}
assert extract_cache_creation_tokens(usage) == 300
def test_new_format_5m_only(self) -> None:
"""测试只有 5 分钟缓存"""
def test_flat_new_format_with_old_format_fallback(self) -> None:
"""测试扁平格式为 0 时回退到旧格式"""
usage = {
"claude_cache_creation_5_m_tokens": 0,
"claude_cache_creation_1_h_tokens": 0,
"cache_creation_input_tokens": 549,
}
assert extract_cache_creation_tokens(usage) == 549
def test_flat_new_format_5m_only(self) -> None:
"""测试只有 5 分钟扁平缓存"""
usage = {
"claude_cache_creation_5_m_tokens": 150,
"claude_cache_creation_1_h_tokens": 0,
}
assert extract_cache_creation_tokens(usage) == 150
def test_new_format_1h_only(self) -> None:
"""测试只有 1 小时缓存"""
def test_flat_new_format_1h_only(self) -> None:
"""测试只有 1 小时扁平缓存"""
usage = {
"claude_cache_creation_5_m_tokens": 0,
"claude_cache_creation_1_h_tokens": 250,
}
assert extract_cache_creation_tokens(usage) == 250
# === 旧格式测试(优先级第三)===
def test_old_format_only(self) -> None:
"""测试只有旧格式字段"""
"""测试只有旧格式"""
usage = {
"cache_creation_input_tokens": 500,
"cache_creation_input_tokens": 549,
}
assert extract_cache_creation_tokens(usage) == 500
assert extract_cache_creation_tokens(usage) == 549
def test_both_formats_prefers_new(self) -> None:
"""测试同时存在时优先使用新格式"""
usage = {
"claude_cache_creation_5_m_tokens": 100,
"claude_cache_creation_1_h_tokens": 200,
"cache_creation_input_tokens": 999, # 应该被忽略
}
assert extract_cache_creation_tokens(usage) == 300
# === 边界情况测试 ===
def test_empty_usage(self) -> None:
"""测试空字典"""
def test_no_cache_creation_tokens(self) -> None:
"""测试没有任何缓存字段"""
usage = {}
assert extract_cache_creation_tokens(usage) == 0
def test_all_zeros(self) -> None:
"""测试所有字段都为 0"""
def test_all_formats_zero(self) -> None:
"""测试所有格式都为 0"""
usage = {
"cache_creation": {
"ephemeral_5m_input_tokens": 0,
"ephemeral_1h_input_tokens": 0,
},
"claude_cache_creation_5_m_tokens": 0,
"claude_cache_creation_1_h_tokens": 0,
"cache_creation_input_tokens": 0,
}
assert extract_cache_creation_tokens(usage) == 0
def test_partial_new_format_with_old_format_fallback(self) -> None:
"""测试新格式字段不存在时回退到旧格式"""
usage = {
"cache_creation_input_tokens": 123,
}
assert extract_cache_creation_tokens(usage) == 123
def test_new_format_zero_should_not_fallback(self) -> None:
"""测试新格式字段存在但为 0 时,不应 fallback 到旧格式"""
usage = {
"claude_cache_creation_5_m_tokens": 0,
"claude_cache_creation_1_h_tokens": 0,
"cache_creation_input_tokens": 456,
}
# 新格式字段存在,即使值为 0 也应该使用新格式(返回 0
# 而不是 fallback 到旧格式(返回 456
assert extract_cache_creation_tokens(usage) == 0
def test_unrelated_fields_ignored(self) -> None:
"""测试忽略无关字段"""
usage = {
"input_tokens": 1000,
"output_tokens": 2000,
"cache_read_input_tokens": 300,
"claude_cache_creation_5_m_tokens": 50,
"claude_cache_creation_1_h_tokens": 75,
"cache_creation": {
"ephemeral_5m_input_tokens": 50,
"ephemeral_1h_input_tokens": 75,
},
}
assert extract_cache_creation_tokens(usage) == 125