mirror of
https://github.com/fawney19/Aether.git
synced 2026-01-03 00:02:28 +08:00
fix: improve error classification and logging system
- Enhance error classifier to properly handle API key failures with fallback support - Add error reason/code parsing for better AWS and multi-provider compatibility - Improve error message structure detection for non-standard formats - Refactor file logging with size-based rotation (100MB) instead of daily - Optimize production logging by disabling backtrace and diagnose - Clean up model validation and remove redundant configurations
This commit is contained in:
@@ -22,7 +22,7 @@
|
|||||||
/>
|
/>
|
||||||
</Transition>
|
</Transition>
|
||||||
|
|
||||||
<div class="relative flex min-h-full items-end justify-center p-4 text-center sm:items-center sm:p-0">
|
<div class="relative flex min-h-full items-end justify-center p-4 text-center sm:items-center sm:p-0 pointer-events-none">
|
||||||
<!-- 对话框内容 -->
|
<!-- 对话框内容 -->
|
||||||
<Transition
|
<Transition
|
||||||
enter-active-class="duration-300 ease-out"
|
enter-active-class="duration-300 ease-out"
|
||||||
|
|||||||
@@ -411,9 +411,10 @@ class BaseMessageHandler:
|
|||||||
QuotaExceededException,
|
QuotaExceededException,
|
||||||
RateLimitException,
|
RateLimitException,
|
||||||
ModelNotSupportedException,
|
ModelNotSupportedException,
|
||||||
|
UpstreamClientException,
|
||||||
)
|
)
|
||||||
|
|
||||||
if isinstance(error, (ProviderException, QuotaExceededException, RateLimitException, ModelNotSupportedException)):
|
if isinstance(error, (ProviderException, QuotaExceededException, RateLimitException, ModelNotSupportedException, UpstreamClientException)):
|
||||||
# 业务异常:简洁日志,不打印堆栈
|
# 业务异常:简洁日志,不打印堆栈
|
||||||
logger.error(f"{message}: [{type(error).__name__}] {error}")
|
logger.error(f"{message}: [{type(error).__name__}] {error}")
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -41,8 +41,8 @@ class CacheSize:
|
|||||||
class ConcurrencyDefaults:
|
class ConcurrencyDefaults:
|
||||||
"""并发控制默认值"""
|
"""并发控制默认值"""
|
||||||
|
|
||||||
# 自适应并发初始限制(保守值)
|
# 自适应并发初始限制(宽松起步,遇到 429 再降低)
|
||||||
INITIAL_LIMIT = 3
|
INITIAL_LIMIT = 50
|
||||||
|
|
||||||
# 429错误后的冷却时间(分钟)- 在此期间不会增加并发限制
|
# 429错误后的冷却时间(分钟)- 在此期间不会增加并发限制
|
||||||
COOLDOWN_AFTER_429_MINUTES = 5
|
COOLDOWN_AFTER_429_MINUTES = 5
|
||||||
@@ -67,13 +67,14 @@ class ConcurrencyDefaults:
|
|||||||
MIN_SAMPLES_FOR_DECISION = 5
|
MIN_SAMPLES_FOR_DECISION = 5
|
||||||
|
|
||||||
# 扩容步长 - 每次扩容增加的并发数
|
# 扩容步长 - 每次扩容增加的并发数
|
||||||
INCREASE_STEP = 1
|
INCREASE_STEP = 2
|
||||||
|
|
||||||
# 缩容乘数 - 遇到 429 时的缩容比例
|
# 缩容乘数 - 遇到 429 时基于当前并发数的缩容比例
|
||||||
DECREASE_MULTIPLIER = 0.7
|
# 0.85 表示降到触发 429 时并发数的 85%
|
||||||
|
DECREASE_MULTIPLIER = 0.85
|
||||||
|
|
||||||
# 最大并发限制上限
|
# 最大并发限制上限
|
||||||
MAX_CONCURRENT_LIMIT = 100
|
MAX_CONCURRENT_LIMIT = 200
|
||||||
|
|
||||||
# 最小并发限制下限
|
# 最小并发限制下限
|
||||||
MIN_CONCURRENT_LIMIT = 1
|
MIN_CONCURRENT_LIMIT = 1
|
||||||
@@ -85,6 +86,11 @@ class ConcurrencyDefaults:
|
|||||||
# 探测性扩容最小请求数 - 在探测间隔内至少需要这么多请求
|
# 探测性扩容最小请求数 - 在探测间隔内至少需要这么多请求
|
||||||
PROBE_INCREASE_MIN_REQUESTS = 10
|
PROBE_INCREASE_MIN_REQUESTS = 10
|
||||||
|
|
||||||
|
# === 缓存用户预留比例 ===
|
||||||
|
# 缓存用户槽位预留比例(新用户可用 1 - 此值)
|
||||||
|
# 0.1 表示缓存用户预留 10%,新用户可用 90%
|
||||||
|
CACHE_RESERVATION_RATIO = 0.1
|
||||||
|
|
||||||
|
|
||||||
class CircuitBreakerDefaults:
|
class CircuitBreakerDefaults:
|
||||||
"""熔断器配置默认值(滑动窗口 + 半开状态模式)
|
"""熔断器配置默认值(滑动窗口 + 半开状态模式)
|
||||||
|
|||||||
@@ -122,9 +122,9 @@ class Config:
|
|||||||
|
|
||||||
# 并发控制配置
|
# 并发控制配置
|
||||||
# CONCURRENCY_SLOT_TTL: 并发槽位 TTL(秒),防止死锁
|
# CONCURRENCY_SLOT_TTL: 并发槽位 TTL(秒),防止死锁
|
||||||
# CACHE_RESERVATION_RATIO: 缓存用户预留比例(默认 30%)
|
# CACHE_RESERVATION_RATIO: 缓存用户预留比例(默认 10%,新用户可用 90%)
|
||||||
self.concurrency_slot_ttl = int(os.getenv("CONCURRENCY_SLOT_TTL", "600"))
|
self.concurrency_slot_ttl = int(os.getenv("CONCURRENCY_SLOT_TTL", "600"))
|
||||||
self.cache_reservation_ratio = float(os.getenv("CACHE_RESERVATION_RATIO", "0.3"))
|
self.cache_reservation_ratio = float(os.getenv("CACHE_RESERVATION_RATIO", "0.1"))
|
||||||
|
|
||||||
# HTTP 请求超时配置(秒)
|
# HTTP 请求超时配置(秒)
|
||||||
self.http_connect_timeout = float(os.getenv("HTTP_CONNECT_TIMEOUT", "10.0"))
|
self.http_connect_timeout = float(os.getenv("HTTP_CONNECT_TIMEOUT", "10.0"))
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
|
|
||||||
输出策略:
|
输出策略:
|
||||||
- 控制台: 开发环境=DEBUG, 生产环境=INFO (通过 LOG_LEVEL 控制)
|
- 控制台: 开发环境=DEBUG, 生产环境=INFO (通过 LOG_LEVEL 控制)
|
||||||
- 文件: 始终保存 DEBUG 级别,保留30天,每日轮转
|
- 文件: 始终保存 DEBUG 级别,保留30天,按大小轮转 (100MB)
|
||||||
|
|
||||||
使用方式:
|
使用方式:
|
||||||
from src.core.logger import logger
|
from src.core.logger import logger
|
||||||
@@ -72,12 +72,15 @@ def _log_filter(record: dict) -> bool: # type: ignore[type-arg]
|
|||||||
|
|
||||||
|
|
||||||
if IS_DOCKER:
|
if IS_DOCKER:
|
||||||
|
# 生产环境:禁用 backtrace 和 diagnose,减少日志噪音
|
||||||
logger.add(
|
logger.add(
|
||||||
sys.stdout,
|
sys.stdout,
|
||||||
format=CONSOLE_FORMAT_PROD,
|
format=CONSOLE_FORMAT_PROD,
|
||||||
level=LOG_LEVEL,
|
level=LOG_LEVEL,
|
||||||
filter=_log_filter, # type: ignore[arg-type]
|
filter=_log_filter, # type: ignore[arg-type]
|
||||||
colorize=False,
|
colorize=False,
|
||||||
|
backtrace=False,
|
||||||
|
diagnose=False,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.add(
|
logger.add(
|
||||||
@@ -92,30 +95,37 @@ if not DISABLE_FILE_LOG:
|
|||||||
log_dir = PROJECT_ROOT / "logs"
|
log_dir = PROJECT_ROOT / "logs"
|
||||||
log_dir.mkdir(exist_ok=True)
|
log_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
# 文件日志通用配置
|
||||||
|
file_log_config = {
|
||||||
|
"format": FILE_FORMAT,
|
||||||
|
"filter": _log_filter,
|
||||||
|
"rotation": "100 MB",
|
||||||
|
"retention": "30 days",
|
||||||
|
"compression": "gz",
|
||||||
|
"enqueue": True,
|
||||||
|
"encoding": "utf-8",
|
||||||
|
"catch": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
# 生产环境禁用详细堆栈
|
||||||
|
if IS_DOCKER:
|
||||||
|
file_log_config["backtrace"] = False
|
||||||
|
file_log_config["diagnose"] = False
|
||||||
|
|
||||||
# 主日志文件 - 所有级别
|
# 主日志文件 - 所有级别
|
||||||
logger.add(
|
logger.add(
|
||||||
log_dir / "app.log",
|
log_dir / "app.log",
|
||||||
format=FILE_FORMAT,
|
|
||||||
level="DEBUG",
|
level="DEBUG",
|
||||||
filter=_log_filter, # type: ignore[arg-type]
|
**file_log_config, # type: ignore[arg-type]
|
||||||
rotation="00:00",
|
|
||||||
retention="30 days",
|
|
||||||
compression="gz",
|
|
||||||
enqueue=True,
|
|
||||||
encoding="utf-8",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# 错误日志文件 - 仅 ERROR 及以上
|
# 错误日志文件 - 仅 ERROR 及以上
|
||||||
|
error_log_config = file_log_config.copy()
|
||||||
|
error_log_config["rotation"] = "50 MB"
|
||||||
logger.add(
|
logger.add(
|
||||||
log_dir / "error.log",
|
log_dir / "error.log",
|
||||||
format=FILE_FORMAT,
|
|
||||||
level="ERROR",
|
level="ERROR",
|
||||||
filter=_log_filter, # type: ignore[arg-type]
|
**error_log_config, # type: ignore[arg-type]
|
||||||
rotation="00:00",
|
|
||||||
retention="30 days",
|
|
||||||
compression="gz",
|
|
||||||
enqueue=True,
|
|
||||||
encoding="utf-8",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|||||||
@@ -107,20 +107,6 @@ class CreateProviderRequest(BaseModel):
|
|||||||
if not re.match(r"^https?://", v, re.IGNORECASE):
|
if not re.match(r"^https?://", v, re.IGNORECASE):
|
||||||
v = f"https://{v}"
|
v = f"https://{v}"
|
||||||
|
|
||||||
# 防止 SSRF 攻击:禁止内网地址
|
|
||||||
forbidden_patterns = [
|
|
||||||
r"localhost",
|
|
||||||
r"127\.0\.0\.1",
|
|
||||||
r"0\.0\.0\.0",
|
|
||||||
r"192\.168\.",
|
|
||||||
r"10\.",
|
|
||||||
r"172\.(1[6-9]|2[0-9]|3[0-1])\.",
|
|
||||||
r"169\.254\.",
|
|
||||||
]
|
|
||||||
for pattern in forbidden_patterns:
|
|
||||||
if re.search(pattern, v, re.IGNORECASE):
|
|
||||||
raise ValueError("不允许使用内网地址")
|
|
||||||
|
|
||||||
return v
|
return v
|
||||||
|
|
||||||
@field_validator("billing_type")
|
@field_validator("billing_type")
|
||||||
@@ -195,19 +181,6 @@ class CreateEndpointRequest(BaseModel):
|
|||||||
if not re.match(r"^https?://", v, re.IGNORECASE):
|
if not re.match(r"^https?://", v, re.IGNORECASE):
|
||||||
raise ValueError("URL 必须以 http:// 或 https:// 开头")
|
raise ValueError("URL 必须以 http:// 或 https:// 开头")
|
||||||
|
|
||||||
# 防止 SSRF
|
|
||||||
forbidden_patterns = [
|
|
||||||
r"localhost",
|
|
||||||
r"127\.0\.0\.1",
|
|
||||||
r"0\.0\.0\.0",
|
|
||||||
r"192\.168\.",
|
|
||||||
r"10\.",
|
|
||||||
r"172\.(1[6-9]|2[0-9]|3[0-1])\.",
|
|
||||||
]
|
|
||||||
for pattern in forbidden_patterns:
|
|
||||||
if re.search(pattern, v, re.IGNORECASE):
|
|
||||||
raise ValueError("不允许使用内网地址")
|
|
||||||
|
|
||||||
return v.rstrip("/") # 移除末尾斜杠
|
return v.rstrip("/") # 移除末尾斜杠
|
||||||
|
|
||||||
@field_validator("api_format")
|
@field_validator("api_format")
|
||||||
|
|||||||
@@ -45,24 +45,9 @@ class ProviderEndpointCreate(BaseModel):
|
|||||||
@field_validator("base_url")
|
@field_validator("base_url")
|
||||||
@classmethod
|
@classmethod
|
||||||
def validate_base_url(cls, v: str) -> str:
|
def validate_base_url(cls, v: str) -> str:
|
||||||
"""验证 API URL(SSRF 防护)"""
|
|
||||||
if not re.match(r"^https?://", v, re.IGNORECASE):
|
if not re.match(r"^https?://", v, re.IGNORECASE):
|
||||||
raise ValueError("URL 必须以 http:// 或 https:// 开头")
|
raise ValueError("URL 必须以 http:// 或 https:// 开头")
|
||||||
|
|
||||||
# 防止 SSRF 攻击:禁止内网地址
|
|
||||||
forbidden_patterns = [
|
|
||||||
r"localhost",
|
|
||||||
r"127\.0\.0\.1",
|
|
||||||
r"0\.0\.0\.0",
|
|
||||||
r"192\.168\.",
|
|
||||||
r"10\.",
|
|
||||||
r"172\.(1[6-9]|2[0-9]|3[0-1])\.",
|
|
||||||
r"169\.254\.",
|
|
||||||
]
|
|
||||||
for pattern in forbidden_patterns:
|
|
||||||
if re.search(pattern, v, re.IGNORECASE):
|
|
||||||
raise ValueError("不允许使用内网地址")
|
|
||||||
|
|
||||||
return v.rstrip("/") # 移除末尾斜杠
|
return v.rstrip("/") # 移除末尾斜杠
|
||||||
|
|
||||||
|
|
||||||
@@ -83,27 +68,13 @@ class ProviderEndpointUpdate(BaseModel):
|
|||||||
@field_validator("base_url")
|
@field_validator("base_url")
|
||||||
@classmethod
|
@classmethod
|
||||||
def validate_base_url(cls, v: Optional[str]) -> Optional[str]:
|
def validate_base_url(cls, v: Optional[str]) -> Optional[str]:
|
||||||
"""验证 API URL(SSRF 防护)"""
|
"""验证 API URL"""
|
||||||
if v is None:
|
if v is None:
|
||||||
return v
|
return v
|
||||||
|
|
||||||
if not re.match(r"^https?://", v, re.IGNORECASE):
|
if not re.match(r"^https?://", v, re.IGNORECASE):
|
||||||
raise ValueError("URL 必须以 http:// 或 https:// 开头")
|
raise ValueError("URL 必须以 http:// 或 https:// 开头")
|
||||||
|
|
||||||
# 防止 SSRF 攻击:禁止内网地址
|
|
||||||
forbidden_patterns = [
|
|
||||||
r"localhost",
|
|
||||||
r"127\.0\.0\.1",
|
|
||||||
r"0\.0\.0\.0",
|
|
||||||
r"192\.168\.",
|
|
||||||
r"10\.",
|
|
||||||
r"172\.(1[6-9]|2[0-9]|3[0-1])\.",
|
|
||||||
r"169\.254\.",
|
|
||||||
]
|
|
||||||
for pattern in forbidden_patterns:
|
|
||||||
if re.search(pattern, v, re.IGNORECASE):
|
|
||||||
raise ValueError("不允许使用内网地址")
|
|
||||||
|
|
||||||
return v.rstrip("/") # 移除末尾斜杠
|
return v.rstrip("/") # 移除末尾斜杠
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
4
src/services/cache/aware_scheduler.py
vendored
4
src/services/cache/aware_scheduler.py
vendored
@@ -59,7 +59,6 @@ from src.services.health.monitor import health_monitor
|
|||||||
from src.services.provider.format import normalize_api_format
|
from src.services.provider.format import normalize_api_format
|
||||||
from src.services.rate_limit.adaptive_reservation import (
|
from src.services.rate_limit.adaptive_reservation import (
|
||||||
AdaptiveReservationManager,
|
AdaptiveReservationManager,
|
||||||
ReservationResult,
|
|
||||||
get_adaptive_reservation_manager,
|
get_adaptive_reservation_manager,
|
||||||
)
|
)
|
||||||
from src.services.rate_limit.concurrency_manager import get_concurrency_manager
|
from src.services.rate_limit.concurrency_manager import get_concurrency_manager
|
||||||
@@ -112,8 +111,6 @@ class CacheAwareScheduler:
|
|||||||
- 健康度监控
|
- 健康度监控
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# 静态常量作为默认值(实际由 AdaptiveReservationManager 动态计算)
|
|
||||||
CACHE_RESERVATION_RATIO = 0.3
|
|
||||||
# 优先级模式常量
|
# 优先级模式常量
|
||||||
PRIORITY_MODE_PROVIDER = "provider" # 提供商优先模式
|
PRIORITY_MODE_PROVIDER = "provider" # 提供商优先模式
|
||||||
PRIORITY_MODE_GLOBAL_KEY = "global_key" # 全局 Key 优先模式
|
PRIORITY_MODE_GLOBAL_KEY = "global_key" # 全局 Key 优先模式
|
||||||
@@ -1320,7 +1317,6 @@ class CacheAwareScheduler:
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
"scheduler": "cache_aware",
|
"scheduler": "cache_aware",
|
||||||
"cache_reservation_ratio": self.CACHE_RESERVATION_RATIO,
|
|
||||||
"dynamic_reservation": {
|
"dynamic_reservation": {
|
||||||
"enabled": True,
|
"enabled": True,
|
||||||
"config": reservation_stats["config"],
|
"config": reservation_stats["config"],
|
||||||
|
|||||||
@@ -69,24 +69,29 @@ class ErrorClassifier:
|
|||||||
# 这些错误是由用户请求本身导致的,换 Provider 也无济于事
|
# 这些错误是由用户请求本身导致的,换 Provider 也无济于事
|
||||||
# 注意:标准 API 返回的 error.type 已在 CLIENT_ERROR_TYPES 中处理
|
# 注意:标准 API 返回的 error.type 已在 CLIENT_ERROR_TYPES 中处理
|
||||||
# 这里主要用于匹配非标准格式或第三方代理的错误消息
|
# 这里主要用于匹配非标准格式或第三方代理的错误消息
|
||||||
|
#
|
||||||
|
# 重要:不要在此列表中包含 Provider Key 配置问题(如 invalid_api_key)
|
||||||
|
# 这类错误应该触发故障转移,而不是直接返回给用户
|
||||||
CLIENT_ERROR_PATTERNS: Tuple[str, ...] = (
|
CLIENT_ERROR_PATTERNS: Tuple[str, ...] = (
|
||||||
"could not process image", # 图片处理失败
|
"could not process image", # 图片处理失败
|
||||||
"image too large", # 图片过大
|
"image too large", # 图片过大
|
||||||
"invalid image", # 无效图片
|
"invalid image", # 无效图片
|
||||||
"unsupported image", # 不支持的图片格式
|
"unsupported image", # 不支持的图片格式
|
||||||
"content_policy_violation", # 内容违规
|
"content_policy_violation", # 内容违规
|
||||||
"invalid_api_key", # 无效的 API Key(不同于认证失败)
|
|
||||||
"context_length_exceeded", # 上下文长度超限
|
"context_length_exceeded", # 上下文长度超限
|
||||||
"content_length_limit", # 请求内容长度超限 (Claude API)
|
"content_length_limit", # 请求内容长度超限 (Claude API)
|
||||||
|
"content_length_exceeds", # 内容长度超限变体 (AWS CodeWhisperer)
|
||||||
"max_tokens", # token 数超限
|
"max_tokens", # token 数超限
|
||||||
"invalid_prompt", # 无效的提示词
|
"invalid_prompt", # 无效的提示词
|
||||||
"content too long", # 内容过长
|
"content too long", # 内容过长
|
||||||
|
"input is too long", # 输入过长 (AWS)
|
||||||
"message is too long", # 消息过长
|
"message is too long", # 消息过长
|
||||||
"prompt is too long", # Prompt 超长(第三方代理常见格式)
|
"prompt is too long", # Prompt 超长(第三方代理常见格式)
|
||||||
"image exceeds", # 图片超出限制
|
"image exceeds", # 图片超出限制
|
||||||
"pdf too large", # PDF 过大
|
"pdf too large", # PDF 过大
|
||||||
"file too large", # 文件过大
|
"file too large", # 文件过大
|
||||||
"tool_use_id", # tool_result 引用了不存在的 tool_use(兼容非标准代理)
|
"tool_use_id", # tool_result 引用了不存在的 tool_use(兼容非标准代理)
|
||||||
|
"validationexception", # AWS 验证异常
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@@ -110,18 +115,124 @@ class ErrorClassifier:
|
|||||||
# 表示客户端错误的 error type(不区分大小写)
|
# 表示客户端错误的 error type(不区分大小写)
|
||||||
# 这些 type 表明是请求本身的问题,不应重试
|
# 这些 type 表明是请求本身的问题,不应重试
|
||||||
CLIENT_ERROR_TYPES: Tuple[str, ...] = (
|
CLIENT_ERROR_TYPES: Tuple[str, ...] = (
|
||||||
"invalid_request_error", # Claude/OpenAI 标准客户端错误类型
|
# Claude/OpenAI 标准
|
||||||
"invalid_argument", # Gemini 参数错误
|
"invalid_request_error",
|
||||||
"failed_precondition", # Gemini 前置条件错误
|
# Gemini
|
||||||
|
"invalid_argument",
|
||||||
|
"failed_precondition",
|
||||||
|
# AWS
|
||||||
|
"validationexception",
|
||||||
|
# 通用
|
||||||
|
"validation_error",
|
||||||
|
"bad_request",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# 表示客户端错误的 reason/code 字段值
|
||||||
|
CLIENT_ERROR_REASONS: Tuple[str, ...] = (
|
||||||
|
"CONTENT_LENGTH_EXCEEDS_THRESHOLD",
|
||||||
|
"CONTEXT_LENGTH_EXCEEDED",
|
||||||
|
"MAX_TOKENS_EXCEEDED",
|
||||||
|
"INVALID_CONTENT",
|
||||||
|
"CONTENT_POLICY_VIOLATION",
|
||||||
|
)
|
||||||
|
|
||||||
|
def _parse_error_response(self, error_text: Optional[str]) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
解析错误响应为结构化数据
|
||||||
|
|
||||||
|
支持多种格式:
|
||||||
|
- {"error": {"type": "...", "message": "..."}} (Claude/OpenAI)
|
||||||
|
- {"error": {"message": "...", "__type": "..."}} (AWS)
|
||||||
|
- {"errorMessage": "..."} (Lambda)
|
||||||
|
- {"error": "..."}
|
||||||
|
- {"message": "...", "reason": "..."}
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
结构化的错误信息: {
|
||||||
|
"type": str, # 错误类型
|
||||||
|
"message": str, # 错误消息
|
||||||
|
"reason": str, # 错误原因/代码
|
||||||
|
"raw": str, # 原始文本
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
result = {"type": "", "message": "", "reason": "", "raw": error_text or ""}
|
||||||
|
|
||||||
|
if not error_text:
|
||||||
|
return result
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(error_text)
|
||||||
|
|
||||||
|
# 格式 1: {"error": {"type": "...", "message": "..."}}
|
||||||
|
if isinstance(data.get("error"), dict):
|
||||||
|
error_obj = data["error"]
|
||||||
|
result["type"] = str(error_obj.get("type", ""))
|
||||||
|
result["message"] = str(error_obj.get("message", ""))
|
||||||
|
|
||||||
|
# AWS 格式: {"error": {"__type": "...", "message": "...", "reason": "..."}}
|
||||||
|
# __type 直接在 error 对象中,而不是嵌套在 message 里
|
||||||
|
if "__type" in error_obj:
|
||||||
|
result["type"] = result["type"] or str(error_obj.get("__type", ""))
|
||||||
|
if "reason" in error_obj:
|
||||||
|
result["reason"] = str(error_obj.get("reason", ""))
|
||||||
|
if "code" in error_obj:
|
||||||
|
result["reason"] = result["reason"] or str(error_obj.get("code", ""))
|
||||||
|
|
||||||
|
# 嵌套 JSON 格式: message 字段本身是 JSON 字符串
|
||||||
|
# 支持多种嵌套格式:
|
||||||
|
# - AWS: {"__type": "...", "message": "...", "reason": "..."}
|
||||||
|
# - 第三方代理: {"error": {"type": "...", "message": "..."}}
|
||||||
|
if result["message"].startswith("{"):
|
||||||
|
try:
|
||||||
|
nested = json.loads(result["message"])
|
||||||
|
if isinstance(nested, dict):
|
||||||
|
# AWS 格式
|
||||||
|
if "__type" in nested:
|
||||||
|
result["type"] = result["type"] or str(nested.get("__type", ""))
|
||||||
|
result["message"] = str(nested.get("message", result["message"]))
|
||||||
|
result["reason"] = str(nested.get("reason", ""))
|
||||||
|
# 第三方代理格式: {"error": {"message": "..."}}
|
||||||
|
elif isinstance(nested.get("error"), dict):
|
||||||
|
inner_error = nested["error"]
|
||||||
|
inner_msg = str(inner_error.get("message", ""))
|
||||||
|
if inner_msg:
|
||||||
|
result["message"] = inner_msg
|
||||||
|
# 简单格式: {"message": "..."}
|
||||||
|
elif "message" in nested:
|
||||||
|
result["message"] = str(nested["message"])
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 格式 2: {"error": "..."}
|
||||||
|
elif isinstance(data.get("error"), str):
|
||||||
|
result["message"] = str(data["error"])
|
||||||
|
|
||||||
|
# 格式 3: {"errorMessage": "..."} (Lambda)
|
||||||
|
elif "errorMessage" in data:
|
||||||
|
result["message"] = str(data["errorMessage"])
|
||||||
|
|
||||||
|
# 格式 4: {"message": "...", "reason": "..."}
|
||||||
|
elif "message" in data:
|
||||||
|
result["message"] = str(data["message"])
|
||||||
|
result["reason"] = str(data.get("reason", ""))
|
||||||
|
|
||||||
|
# 提取顶层的 reason/code
|
||||||
|
if not result["reason"]:
|
||||||
|
result["reason"] = str(data.get("reason", data.get("code", "")))
|
||||||
|
|
||||||
|
except (json.JSONDecodeError, TypeError, KeyError):
|
||||||
|
result["message"] = error_text[:500] if len(error_text) > 500 else error_text
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
def _is_client_error(self, error_text: Optional[str]) -> bool:
|
def _is_client_error(self, error_text: Optional[str]) -> bool:
|
||||||
"""
|
"""
|
||||||
检测错误响应是否为客户端错误(不应重试)
|
检测错误响应是否为客户端错误(不应重试)
|
||||||
|
|
||||||
判断逻辑:
|
判断逻辑(按优先级):
|
||||||
1. 检查 error.type 是否为已知的客户端错误类型
|
1. 检查 error.type 是否为已知的客户端错误类型
|
||||||
2. 检查错误文本是否包含已知的客户端错误模式
|
2. 检查 reason/code 是否为已知的客户端错误原因
|
||||||
|
3. 回退到关键词匹配
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
error_text: 错误响应文本
|
error_text: 错误响应文本
|
||||||
@@ -132,67 +243,53 @@ class ErrorClassifier:
|
|||||||
if not error_text:
|
if not error_text:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# 尝试解析 JSON 并检查 error type
|
parsed = self._parse_error_response(error_text)
|
||||||
try:
|
|
||||||
data = json.loads(error_text)
|
|
||||||
if isinstance(data.get("error"), dict):
|
|
||||||
error_type = data["error"].get("type", "")
|
|
||||||
if error_type and any(
|
|
||||||
t.lower() in error_type.lower() for t in self.CLIENT_ERROR_TYPES
|
|
||||||
):
|
|
||||||
return True
|
|
||||||
except (json.JSONDecodeError, TypeError, KeyError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
# 回退到关键词匹配
|
# 1. 检查 error type
|
||||||
error_lower = error_text.lower()
|
if parsed["type"]:
|
||||||
return any(pattern.lower() in error_lower for pattern in self.CLIENT_ERROR_PATTERNS)
|
error_type_lower = parsed["type"].lower()
|
||||||
|
if any(t.lower() in error_type_lower for t in self.CLIENT_ERROR_TYPES):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# 2. 检查 reason/code
|
||||||
|
if parsed["reason"]:
|
||||||
|
reason_upper = parsed["reason"].upper()
|
||||||
|
if any(r in reason_upper for r in self.CLIENT_ERROR_REASONS):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# 3. 回退到关键词匹配(合并 message 和 raw)
|
||||||
|
search_text = f"{parsed['message']} {parsed['raw']}".lower()
|
||||||
|
return any(pattern.lower() in search_text for pattern in self.CLIENT_ERROR_PATTERNS)
|
||||||
|
|
||||||
def _extract_error_message(self, error_text: Optional[str]) -> Optional[str]:
|
def _extract_error_message(self, error_text: Optional[str]) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
从错误响应中提取错误消息
|
从错误响应中提取错误消息
|
||||||
|
|
||||||
支持格式:
|
|
||||||
- {"error": {"message": "..."}} (OpenAI/Claude)
|
|
||||||
- {"error": {"type": "...", "message": "..."}}
|
|
||||||
- {"error": "..."}
|
|
||||||
- {"message": "..."}
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
error_text: 错误响应文本
|
error_text: 错误响应文本
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
提取的错误消息,如果无法解析则返回原始文本
|
提取的错误消息
|
||||||
"""
|
"""
|
||||||
if not error_text:
|
if not error_text:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
parsed = self._parse_error_response(error_text)
|
||||||
data = json.loads(error_text)
|
|
||||||
|
|
||||||
# {"error": {"message": "..."}} 或 {"error": {"type": "...", "message": "..."}}
|
# 构建可读的错误消息
|
||||||
if isinstance(data.get("error"), dict):
|
parts = []
|
||||||
error_obj = data["error"]
|
if parsed["type"]:
|
||||||
message = error_obj.get("message", "")
|
parts.append(parsed["type"])
|
||||||
error_type = error_obj.get("type", "")
|
if parsed["reason"]:
|
||||||
if message:
|
parts.append(f"[{parsed['reason']}]")
|
||||||
if error_type:
|
if parsed["message"]:
|
||||||
return f"{error_type}: {message}"
|
parts.append(parsed["message"])
|
||||||
return str(message)
|
|
||||||
|
|
||||||
# {"error": "..."}
|
if parts:
|
||||||
if isinstance(data.get("error"), str):
|
return ": ".join(parts) if len(parts) > 1 else parts[0]
|
||||||
return str(data["error"])
|
|
||||||
|
|
||||||
# {"message": "..."}
|
|
||||||
if isinstance(data.get("message"), str):
|
|
||||||
return str(data["message"])
|
|
||||||
|
|
||||||
except (json.JSONDecodeError, TypeError, KeyError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
# 无法解析,返回原始文本(截断)
|
# 无法解析,返回原始文本(截断)
|
||||||
return error_text[:500] if len(error_text) > 500 else error_text
|
return parsed["raw"][:500] if len(parsed["raw"]) > 500 else parsed["raw"]
|
||||||
|
|
||||||
def classify(
|
def classify(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -5,6 +5,10 @@
|
|||||||
- 使用滑动窗口采样,容忍并发波动
|
- 使用滑动窗口采样,容忍并发波动
|
||||||
- 基于窗口内高利用率采样比例决策,而非要求连续高利用率
|
- 基于窗口内高利用率采样比例决策,而非要求连续高利用率
|
||||||
- 增加探测性扩容机制,长时间稳定时主动尝试扩容
|
- 增加探测性扩容机制,长时间稳定时主动尝试扩容
|
||||||
|
|
||||||
|
AIMD 参数说明:
|
||||||
|
- 扩容:加性增加 (+INCREASE_STEP)
|
||||||
|
- 缩容:乘性减少 (*DECREASE_MULTIPLIER,默认 0.85)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
@@ -34,7 +38,7 @@ class AdaptiveConcurrencyManager:
|
|||||||
核心算法:基于滑动窗口利用率的 AIMD
|
核心算法:基于滑动窗口利用率的 AIMD
|
||||||
- 滑动窗口记录最近 N 次请求的利用率
|
- 滑动窗口记录最近 N 次请求的利用率
|
||||||
- 当窗口内高利用率采样比例 >= 60% 时触发扩容
|
- 当窗口内高利用率采样比例 >= 60% 时触发扩容
|
||||||
- 遇到 429 错误时乘性减少 (*0.7)
|
- 遇到 429 错误时乘性减少 (*0.85)
|
||||||
- 长时间无 429 且有流量时触发探测性扩容
|
- 长时间无 429 且有流量时触发探测性扩容
|
||||||
|
|
||||||
扩容条件(满足任一即可):
|
扩容条件(满足任一即可):
|
||||||
|
|||||||
Reference in New Issue
Block a user