diff --git a/frontend/src/components/ui/dialog/Dialog.vue b/frontend/src/components/ui/dialog/Dialog.vue index cb8fc89..26cb66c 100644 --- a/frontend/src/components/ui/dialog/Dialog.vue +++ b/frontend/src/components/ui/dialog/Dialog.vue @@ -22,7 +22,7 @@ /> -
+
bool: # type: ignore[type-arg] if IS_DOCKER: + # 生产环境:禁用 backtrace 和 diagnose,减少日志噪音 logger.add( sys.stdout, format=CONSOLE_FORMAT_PROD, level=LOG_LEVEL, filter=_log_filter, # type: ignore[arg-type] colorize=False, + backtrace=False, + diagnose=False, ) else: logger.add( @@ -92,30 +95,37 @@ if not DISABLE_FILE_LOG: log_dir = PROJECT_ROOT / "logs" log_dir.mkdir(exist_ok=True) + # 文件日志通用配置 + file_log_config = { + "format": FILE_FORMAT, + "filter": _log_filter, + "rotation": "100 MB", + "retention": "30 days", + "compression": "gz", + "enqueue": True, + "encoding": "utf-8", + "catch": True, + } + + # 生产环境禁用详细堆栈 + if IS_DOCKER: + file_log_config["backtrace"] = False + file_log_config["diagnose"] = False + # 主日志文件 - 所有级别 logger.add( log_dir / "app.log", - format=FILE_FORMAT, level="DEBUG", - filter=_log_filter, # type: ignore[arg-type] - rotation="00:00", - retention="30 days", - compression="gz", - enqueue=True, - encoding="utf-8", + **file_log_config, # type: ignore[arg-type] ) # 错误日志文件 - 仅 ERROR 及以上 + error_log_config = file_log_config.copy() + error_log_config["rotation"] = "50 MB" logger.add( log_dir / "error.log", - format=FILE_FORMAT, level="ERROR", - filter=_log_filter, # type: ignore[arg-type] - rotation="00:00", - retention="30 days", - compression="gz", - enqueue=True, - encoding="utf-8", + **error_log_config, # type: ignore[arg-type] ) # ============================================================================ diff --git a/src/models/admin_requests.py b/src/models/admin_requests.py index 1ab87a8..c20e05f 100644 --- a/src/models/admin_requests.py +++ b/src/models/admin_requests.py @@ -107,20 +107,6 @@ class CreateProviderRequest(BaseModel): if not re.match(r"^https?://", v, re.IGNORECASE): v = f"https://{v}" - # 防止 SSRF 攻击:禁止内网地址 - forbidden_patterns = [ - r"localhost", - r"127\.0\.0\.1", - r"0\.0\.0\.0", - r"192\.168\.", - r"10\.", - r"172\.(1[6-9]|2[0-9]|3[0-1])\.", - r"169\.254\.", - ] - for pattern in forbidden_patterns: - if re.search(pattern, v, re.IGNORECASE): - raise ValueError("不允许使用内网地址") - return v @field_validator("billing_type") @@ -195,19 +181,6 @@ class CreateEndpointRequest(BaseModel): if not re.match(r"^https?://", v, re.IGNORECASE): raise ValueError("URL 必须以 http:// 或 https:// 开头") - # 防止 SSRF - forbidden_patterns = [ - r"localhost", - r"127\.0\.0\.1", - r"0\.0\.0\.0", - r"192\.168\.", - r"10\.", - r"172\.(1[6-9]|2[0-9]|3[0-1])\.", - ] - for pattern in forbidden_patterns: - if re.search(pattern, v, re.IGNORECASE): - raise ValueError("不允许使用内网地址") - return v.rstrip("/") # 移除末尾斜杠 @field_validator("api_format") diff --git a/src/models/endpoint_models.py b/src/models/endpoint_models.py index c9bd0f2..b61f091 100644 --- a/src/models/endpoint_models.py +++ b/src/models/endpoint_models.py @@ -45,24 +45,9 @@ class ProviderEndpointCreate(BaseModel): @field_validator("base_url") @classmethod def validate_base_url(cls, v: str) -> str: - """验证 API URL(SSRF 防护)""" if not re.match(r"^https?://", v, re.IGNORECASE): raise ValueError("URL 必须以 http:// 或 https:// 开头") - # 防止 SSRF 攻击:禁止内网地址 - forbidden_patterns = [ - r"localhost", - r"127\.0\.0\.1", - r"0\.0\.0\.0", - r"192\.168\.", - r"10\.", - r"172\.(1[6-9]|2[0-9]|3[0-1])\.", - r"169\.254\.", - ] - for pattern in forbidden_patterns: - if re.search(pattern, v, re.IGNORECASE): - raise ValueError("不允许使用内网地址") - return v.rstrip("/") # 移除末尾斜杠 @@ -83,27 +68,13 @@ class ProviderEndpointUpdate(BaseModel): @field_validator("base_url") @classmethod def validate_base_url(cls, v: Optional[str]) -> Optional[str]: - """验证 API URL(SSRF 防护)""" + """验证 API URL""" if v is None: return v if not re.match(r"^https?://", v, re.IGNORECASE): raise ValueError("URL 必须以 http:// 或 https:// 开头") - # 防止 SSRF 攻击:禁止内网地址 - forbidden_patterns = [ - r"localhost", - r"127\.0\.0\.1", - r"0\.0\.0\.0", - r"192\.168\.", - r"10\.", - r"172\.(1[6-9]|2[0-9]|3[0-1])\.", - r"169\.254\.", - ] - for pattern in forbidden_patterns: - if re.search(pattern, v, re.IGNORECASE): - raise ValueError("不允许使用内网地址") - return v.rstrip("/") # 移除末尾斜杠 diff --git a/src/services/cache/aware_scheduler.py b/src/services/cache/aware_scheduler.py index 9f712cd..4737629 100644 --- a/src/services/cache/aware_scheduler.py +++ b/src/services/cache/aware_scheduler.py @@ -59,7 +59,6 @@ from src.services.health.monitor import health_monitor from src.services.provider.format import normalize_api_format from src.services.rate_limit.adaptive_reservation import ( AdaptiveReservationManager, - ReservationResult, get_adaptive_reservation_manager, ) from src.services.rate_limit.concurrency_manager import get_concurrency_manager @@ -112,8 +111,6 @@ class CacheAwareScheduler: - 健康度监控 """ - # 静态常量作为默认值(实际由 AdaptiveReservationManager 动态计算) - CACHE_RESERVATION_RATIO = 0.3 # 优先级模式常量 PRIORITY_MODE_PROVIDER = "provider" # 提供商优先模式 PRIORITY_MODE_GLOBAL_KEY = "global_key" # 全局 Key 优先模式 @@ -1320,7 +1317,6 @@ class CacheAwareScheduler: return { "scheduler": "cache_aware", - "cache_reservation_ratio": self.CACHE_RESERVATION_RATIO, "dynamic_reservation": { "enabled": True, "config": reservation_stats["config"], diff --git a/src/services/orchestration/error_classifier.py b/src/services/orchestration/error_classifier.py index 30b71e9..b9fe5bb 100644 --- a/src/services/orchestration/error_classifier.py +++ b/src/services/orchestration/error_classifier.py @@ -69,24 +69,29 @@ class ErrorClassifier: # 这些错误是由用户请求本身导致的,换 Provider 也无济于事 # 注意:标准 API 返回的 error.type 已在 CLIENT_ERROR_TYPES 中处理 # 这里主要用于匹配非标准格式或第三方代理的错误消息 + # + # 重要:不要在此列表中包含 Provider Key 配置问题(如 invalid_api_key) + # 这类错误应该触发故障转移,而不是直接返回给用户 CLIENT_ERROR_PATTERNS: Tuple[str, ...] = ( "could not process image", # 图片处理失败 "image too large", # 图片过大 "invalid image", # 无效图片 "unsupported image", # 不支持的图片格式 "content_policy_violation", # 内容违规 - "invalid_api_key", # 无效的 API Key(不同于认证失败) "context_length_exceeded", # 上下文长度超限 "content_length_limit", # 请求内容长度超限 (Claude API) + "content_length_exceeds", # 内容长度超限变体 (AWS CodeWhisperer) "max_tokens", # token 数超限 "invalid_prompt", # 无效的提示词 "content too long", # 内容过长 + "input is too long", # 输入过长 (AWS) "message is too long", # 消息过长 "prompt is too long", # Prompt 超长(第三方代理常见格式) "image exceeds", # 图片超出限制 "pdf too large", # PDF 过大 "file too large", # 文件过大 "tool_use_id", # tool_result 引用了不存在的 tool_use(兼容非标准代理) + "validationexception", # AWS 验证异常 ) def __init__( @@ -110,18 +115,124 @@ class ErrorClassifier: # 表示客户端错误的 error type(不区分大小写) # 这些 type 表明是请求本身的问题,不应重试 CLIENT_ERROR_TYPES: Tuple[str, ...] = ( - "invalid_request_error", # Claude/OpenAI 标准客户端错误类型 - "invalid_argument", # Gemini 参数错误 - "failed_precondition", # Gemini 前置条件错误 + # Claude/OpenAI 标准 + "invalid_request_error", + # Gemini + "invalid_argument", + "failed_precondition", + # AWS + "validationexception", + # 通用 + "validation_error", + "bad_request", ) + # 表示客户端错误的 reason/code 字段值 + CLIENT_ERROR_REASONS: Tuple[str, ...] = ( + "CONTENT_LENGTH_EXCEEDS_THRESHOLD", + "CONTEXT_LENGTH_EXCEEDED", + "MAX_TOKENS_EXCEEDED", + "INVALID_CONTENT", + "CONTENT_POLICY_VIOLATION", + ) + + def _parse_error_response(self, error_text: Optional[str]) -> Dict[str, Any]: + """ + 解析错误响应为结构化数据 + + 支持多种格式: + - {"error": {"type": "...", "message": "..."}} (Claude/OpenAI) + - {"error": {"message": "...", "__type": "..."}} (AWS) + - {"errorMessage": "..."} (Lambda) + - {"error": "..."} + - {"message": "...", "reason": "..."} + + Returns: + 结构化的错误信息: { + "type": str, # 错误类型 + "message": str, # 错误消息 + "reason": str, # 错误原因/代码 + "raw": str, # 原始文本 + } + """ + result = {"type": "", "message": "", "reason": "", "raw": error_text or ""} + + if not error_text: + return result + + try: + data = json.loads(error_text) + + # 格式 1: {"error": {"type": "...", "message": "..."}} + if isinstance(data.get("error"), dict): + error_obj = data["error"] + result["type"] = str(error_obj.get("type", "")) + result["message"] = str(error_obj.get("message", "")) + + # AWS 格式: {"error": {"__type": "...", "message": "...", "reason": "..."}} + # __type 直接在 error 对象中,而不是嵌套在 message 里 + if "__type" in error_obj: + result["type"] = result["type"] or str(error_obj.get("__type", "")) + if "reason" in error_obj: + result["reason"] = str(error_obj.get("reason", "")) + if "code" in error_obj: + result["reason"] = result["reason"] or str(error_obj.get("code", "")) + + # 嵌套 JSON 格式: message 字段本身是 JSON 字符串 + # 支持多种嵌套格式: + # - AWS: {"__type": "...", "message": "...", "reason": "..."} + # - 第三方代理: {"error": {"type": "...", "message": "..."}} + if result["message"].startswith("{"): + try: + nested = json.loads(result["message"]) + if isinstance(nested, dict): + # AWS 格式 + if "__type" in nested: + result["type"] = result["type"] or str(nested.get("__type", "")) + result["message"] = str(nested.get("message", result["message"])) + result["reason"] = str(nested.get("reason", "")) + # 第三方代理格式: {"error": {"message": "..."}} + elif isinstance(nested.get("error"), dict): + inner_error = nested["error"] + inner_msg = str(inner_error.get("message", "")) + if inner_msg: + result["message"] = inner_msg + # 简单格式: {"message": "..."} + elif "message" in nested: + result["message"] = str(nested["message"]) + except json.JSONDecodeError: + pass + + # 格式 2: {"error": "..."} + elif isinstance(data.get("error"), str): + result["message"] = str(data["error"]) + + # 格式 3: {"errorMessage": "..."} (Lambda) + elif "errorMessage" in data: + result["message"] = str(data["errorMessage"]) + + # 格式 4: {"message": "...", "reason": "..."} + elif "message" in data: + result["message"] = str(data["message"]) + result["reason"] = str(data.get("reason", "")) + + # 提取顶层的 reason/code + if not result["reason"]: + result["reason"] = str(data.get("reason", data.get("code", ""))) + + except (json.JSONDecodeError, TypeError, KeyError): + result["message"] = error_text[:500] if len(error_text) > 500 else error_text + + return result + def _is_client_error(self, error_text: Optional[str]) -> bool: """ 检测错误响应是否为客户端错误(不应重试) - 判断逻辑: + 判断逻辑(按优先级): 1. 检查 error.type 是否为已知的客户端错误类型 - 2. 检查错误文本是否包含已知的客户端错误模式 + 2. 检查 reason/code 是否为已知的客户端错误原因 + 3. 回退到关键词匹配 Args: error_text: 错误响应文本 @@ -132,67 +243,53 @@ class ErrorClassifier: if not error_text: return False - # 尝试解析 JSON 并检查 error type - try: - data = json.loads(error_text) - if isinstance(data.get("error"), dict): - error_type = data["error"].get("type", "") - if error_type and any( - t.lower() in error_type.lower() for t in self.CLIENT_ERROR_TYPES - ): - return True - except (json.JSONDecodeError, TypeError, KeyError): - pass + parsed = self._parse_error_response(error_text) - # 回退到关键词匹配 - error_lower = error_text.lower() - return any(pattern.lower() in error_lower for pattern in self.CLIENT_ERROR_PATTERNS) + # 1. 检查 error type + if parsed["type"]: + error_type_lower = parsed["type"].lower() + if any(t.lower() in error_type_lower for t in self.CLIENT_ERROR_TYPES): + return True + + # 2. 检查 reason/code + if parsed["reason"]: + reason_upper = parsed["reason"].upper() + if any(r in reason_upper for r in self.CLIENT_ERROR_REASONS): + return True + + # 3. 回退到关键词匹配(合并 message 和 raw) + search_text = f"{parsed['message']} {parsed['raw']}".lower() + return any(pattern.lower() in search_text for pattern in self.CLIENT_ERROR_PATTERNS) def _extract_error_message(self, error_text: Optional[str]) -> Optional[str]: """ 从错误响应中提取错误消息 - 支持格式: - - {"error": {"message": "..."}} (OpenAI/Claude) - - {"error": {"type": "...", "message": "..."}} - - {"error": "..."} - - {"message": "..."} - Args: error_text: 错误响应文本 Returns: - 提取的错误消息,如果无法解析则返回原始文本 + 提取的错误消息 """ if not error_text: return None - try: - data = json.loads(error_text) + parsed = self._parse_error_response(error_text) - # {"error": {"message": "..."}} 或 {"error": {"type": "...", "message": "..."}} - if isinstance(data.get("error"), dict): - error_obj = data["error"] - message = error_obj.get("message", "") - error_type = error_obj.get("type", "") - if message: - if error_type: - return f"{error_type}: {message}" - return str(message) + # 构建可读的错误消息 + parts = [] + if parsed["type"]: + parts.append(parsed["type"]) + if parsed["reason"]: + parts.append(f"[{parsed['reason']}]") + if parsed["message"]: + parts.append(parsed["message"]) - # {"error": "..."} - if isinstance(data.get("error"), str): - return str(data["error"]) - - # {"message": "..."} - if isinstance(data.get("message"), str): - return str(data["message"]) - - except (json.JSONDecodeError, TypeError, KeyError): - pass + if parts: + return ": ".join(parts) if len(parts) > 1 else parts[0] # 无法解析,返回原始文本(截断) - return error_text[:500] if len(error_text) > 500 else error_text + return parsed["raw"][:500] if len(parsed["raw"]) > 500 else parsed["raw"] def classify( self, diff --git a/src/services/rate_limit/adaptive_concurrency.py b/src/services/rate_limit/adaptive_concurrency.py index 8190156..1bd5d21 100644 --- a/src/services/rate_limit/adaptive_concurrency.py +++ b/src/services/rate_limit/adaptive_concurrency.py @@ -5,6 +5,10 @@ - 使用滑动窗口采样,容忍并发波动 - 基于窗口内高利用率采样比例决策,而非要求连续高利用率 - 增加探测性扩容机制,长时间稳定时主动尝试扩容 + +AIMD 参数说明: +- 扩容:加性增加 (+INCREASE_STEP) +- 缩容:乘性减少 (*DECREASE_MULTIPLIER,默认 0.85) """ from datetime import datetime, timezone @@ -34,7 +38,7 @@ class AdaptiveConcurrencyManager: 核心算法:基于滑动窗口利用率的 AIMD - 滑动窗口记录最近 N 次请求的利用率 - 当窗口内高利用率采样比例 >= 60% 时触发扩容 - - 遇到 429 错误时乘性减少 (*0.7) + - 遇到 429 错误时乘性减少 (*0.85) - 长时间无 429 且有流量时触发探测性扩容 扩容条件(满足任一即可):