refactor: 重构限流系统和健康监控,支持按 API 格式区分

- 将 adaptive_concurrency 重命名为 adaptive_rpm,从并发控制改为 RPM 控制
- 健康监控器支持按 API 格式独立管理健康度和熔断器状态
- 新增 model_permissions 模块,支持按格式配置允许的模型
- 重构前端提供商相关表单组件,新增 Collapsible UI 组件
- 新增数据库迁移脚本支持新的数据结构
This commit is contained in:
fawney19
2026-01-10 18:43:53 +08:00
parent dd2fbf4424
commit 09e0f594ff
97 changed files with 6642 additions and 4169 deletions

View File

@@ -96,8 +96,6 @@ class QuotaScheduler:
logger.info(f"Resetting quota for provider {provider.name}")
provider.monthly_used_usd = 0.0
provider.rpm_used = 0 # 同时重置RPM计数
provider.rpm_reset_at = None
provider.quota_last_reset_at = now
reset_count += 1
@@ -126,8 +124,6 @@ class QuotaScheduler:
provider = db.query(Provider).filter(Provider.id == provider_id).first()
if provider and provider.billing_type == ProviderBillingType.MONTHLY_QUOTA:
provider.monthly_used_usd = 0.0
provider.rpm_used = 0
provider.rpm_reset_at = None
provider.quota_last_reset_at = now
db.commit()
logger.info(f"Force reset quota for provider {provider.name}")
@@ -140,8 +136,6 @@ class QuotaScheduler:
)
for provider in providers:
provider.monthly_used_usd = 0.0
provider.rpm_used = 0
provider.rpm_reset_at = None
provider.quota_last_reset_at = now
db.commit()
logger.info(f"Force reset quotas for {len(providers)} providers")

View File

@@ -93,6 +93,10 @@ class UsageRecorder:
if metadata.original_model and metadata.original_model != metadata.model:
target_model = metadata.model
# 非流式成功时,返回给客户端的是提供商响应头(透传)+ content-type
client_response_headers = dict(metadata.provider_response_headers) if metadata.provider_response_headers else {}
client_response_headers["content-type"] = "application/json"
await UsageService.record_usage(
db=self.db,
user=self.user,
@@ -115,6 +119,7 @@ class UsageRecorder:
request_body=request_body or result.request_body,
provider_request_headers=metadata.provider_request_headers,
response_headers=metadata.provider_response_headers,
client_response_headers=client_response_headers,
response_body=result.response_data if isinstance(result.response_data, dict) else {},
request_id=self.request_id,
provider_id=metadata.provider_id,
@@ -181,6 +186,8 @@ class UsageRecorder:
request_body=request_body or result.request_body,
provider_request_headers=metadata.provider_request_headers,
response_headers={},
# 失败请求返回给客户端的是 JSON 错误响应
client_response_headers={"content-type": "application/json"},
response_body={"error": result.error_message} if result.error_message else {},
request_id=self.request_id,
provider_id=metadata.provider_id,

View File

@@ -40,6 +40,7 @@ class UsageRecordParams:
request_body: Optional[Any]
provider_request_headers: Optional[Dict[str, Any]]
response_headers: Optional[Dict[str, Any]]
client_response_headers: Optional[Dict[str, Any]]
response_body: Optional[Any]
request_id: str
provider_id: Optional[str]
@@ -223,6 +224,7 @@ class UsageService:
request_body: Optional[Any],
provider_request_headers: Optional[Dict[str, Any]],
response_headers: Optional[Dict[str, Any]],
client_response_headers: Optional[Dict[str, Any]],
response_body: Optional[Any],
request_id: str,
provider_id: Optional[str],
@@ -288,6 +290,13 @@ class UsageService:
db, response_headers
)
# 处理返回给客户端的响应头
processed_client_response_headers = None
if should_log_headers and client_response_headers:
processed_client_response_headers = SystemConfigService.mask_sensitive_headers(
db, client_response_headers
)
# 计算真实成本(表面成本 * 倍率),免费套餐实际费用为 0
if is_free_tier:
actual_input_cost = 0.0
@@ -351,6 +360,7 @@ class UsageService:
"request_body": processed_request_body,
"provider_request_headers": processed_provider_request_headers,
"response_headers": processed_response_headers,
"client_response_headers": processed_client_response_headers,
"response_body": processed_response_body,
}
@@ -360,12 +370,13 @@ class UsageService:
db: Session,
provider_api_key_id: Optional[str],
provider_id: Optional[str],
api_format: Optional[str] = None,
) -> Tuple[float, bool]:
"""获取费率倍数和是否免费套餐(使用缓存)"""
from src.services.cache.provider_cache import ProviderCacheService
return await ProviderCacheService.get_rate_multiplier_and_free_tier(
db, provider_api_key_id, provider_id
db, provider_api_key_id, provider_id, api_format
)
@classmethod
@@ -484,6 +495,7 @@ class UsageService:
existing_usage.provider_request_headers = usage_params["provider_request_headers"]
existing_usage.response_body = usage_params["response_body"]
existing_usage.response_headers = usage_params["response_headers"]
existing_usage.client_response_headers = usage_params["client_response_headers"]
# 更新 token 和费用信息
existing_usage.input_tokens = usage_params["input_tokens"]
@@ -656,9 +668,9 @@ class UsageService:
Returns:
(usage_params 字典, total_cost 总成本)
"""
# 获取费率倍数和是否免费套餐
# 获取费率倍数和是否免费套餐(传递 api_format 支持按格式配置的倍率)
actual_rate_multiplier, is_free_tier = await cls._get_rate_multiplier_and_free_tier(
params.db, params.provider_api_key_id, params.provider_id
params.db, params.provider_api_key_id, params.provider_id, params.api_format
)
# 计算成本
@@ -704,6 +716,7 @@ class UsageService:
request_body=params.request_body,
provider_request_headers=params.provider_request_headers,
response_headers=params.response_headers,
client_response_headers=params.client_response_headers,
response_body=params.response_body,
request_id=params.request_id,
provider_id=params.provider_id,
@@ -753,6 +766,7 @@ class UsageService:
request_body: Optional[Any] = None,
provider_request_headers: Optional[Dict[str, Any]] = None,
response_headers: Optional[Dict[str, Any]] = None,
client_response_headers: Optional[Dict[str, Any]] = None,
response_body: Optional[Any] = None,
request_id: Optional[str] = None,
provider_id: Optional[str] = None,
@@ -785,7 +799,8 @@ class UsageService:
status_code=status_code, error_message=error_message, metadata=metadata,
request_headers=request_headers, request_body=request_body,
provider_request_headers=provider_request_headers,
response_headers=response_headers, response_body=response_body,
response_headers=response_headers, client_response_headers=client_response_headers,
response_body=response_body,
request_id=request_id, provider_id=provider_id,
provider_endpoint_id=provider_endpoint_id,
provider_api_key_id=provider_api_key_id, status=status,
@@ -844,6 +859,7 @@ class UsageService:
request_body: Optional[Any] = None,
provider_request_headers: Optional[Dict[str, Any]] = None,
response_headers: Optional[Dict[str, Any]] = None,
client_response_headers: Optional[Dict[str, Any]] = None,
response_body: Optional[Any] = None,
request_id: Optional[str] = None,
provider_id: Optional[str] = None,
@@ -878,7 +894,8 @@ class UsageService:
status_code=status_code, error_message=error_message, metadata=metadata,
request_headers=request_headers, request_body=request_body,
provider_request_headers=provider_request_headers,
response_headers=response_headers, response_body=response_body,
response_headers=response_headers, client_response_headers=client_response_headers,
response_body=response_body,
request_id=request_id, provider_id=provider_id,
provider_endpoint_id=provider_endpoint_id,
provider_api_key_id=provider_api_key_id, status=status,