mirror of
https://github.com/fawney19/Aether.git
synced 2026-01-08 18:52:28 +08:00
341 lines
13 KiB
Python
341 lines
13 KiB
Python
"""
|
||
自适应预留比例管理器
|
||
|
||
根据学习置信度和当前负载动态计算缓存用户预留比例,
|
||
解决固定 30% 预留在学习初期和负载变化时的不适应问题。
|
||
|
||
核心思路:
|
||
1. 探测阶段:使用低预留,让系统快速学习真实并发限制
|
||
2. 稳定阶段:根据置信度和负载动态调整预留比例
|
||
3. 置信度计算:综合考虑连续成功次数、429冷却时间、调整历史稳定性
|
||
"""
|
||
|
||
import statistics
|
||
from dataclasses import dataclass, field
|
||
from datetime import datetime, timezone
|
||
from typing import TYPE_CHECKING, Any, Dict, Optional
|
||
|
||
from src.core.logger import logger
|
||
|
||
from src.config.constants import AdaptiveReservationDefaults
|
||
|
||
if TYPE_CHECKING:
|
||
from src.models.database import ProviderAPIKey
|
||
|
||
|
||
@dataclass
|
||
class ReservationConfig:
|
||
"""预留比例配置(使用统一常量作为默认值)"""
|
||
|
||
# 探测阶段配置
|
||
probe_phase_requests: int = field(
|
||
default_factory=lambda: AdaptiveReservationDefaults.PROBE_PHASE_REQUESTS
|
||
)
|
||
probe_reservation: float = field(
|
||
default_factory=lambda: AdaptiveReservationDefaults.PROBE_RESERVATION
|
||
)
|
||
|
||
# 稳定阶段配置
|
||
stable_min_reservation: float = field(
|
||
default_factory=lambda: AdaptiveReservationDefaults.STABLE_MIN_RESERVATION
|
||
)
|
||
stable_max_reservation: float = field(
|
||
default_factory=lambda: AdaptiveReservationDefaults.STABLE_MAX_RESERVATION
|
||
)
|
||
|
||
# 置信度计算参数
|
||
success_count_for_full_confidence: int = field(
|
||
default_factory=lambda: AdaptiveReservationDefaults.SUCCESS_COUNT_FOR_FULL_CONFIDENCE
|
||
)
|
||
cooldown_hours_for_full_confidence: int = field(
|
||
default_factory=lambda: AdaptiveReservationDefaults.COOLDOWN_HOURS_FOR_FULL_CONFIDENCE
|
||
)
|
||
|
||
# 负载阈值
|
||
low_load_threshold: float = field(
|
||
default_factory=lambda: AdaptiveReservationDefaults.LOW_LOAD_THRESHOLD
|
||
)
|
||
high_load_threshold: float = field(
|
||
default_factory=lambda: AdaptiveReservationDefaults.HIGH_LOAD_THRESHOLD
|
||
)
|
||
|
||
|
||
@dataclass
|
||
class ReservationResult:
|
||
"""预留比例计算结果"""
|
||
|
||
ratio: float # 最终预留比例
|
||
phase: str # 当前阶段: "probe" | "stable"
|
||
confidence: float # 置信度 (0-1)
|
||
load_factor: float # 负载因子 (0-1)
|
||
details: Dict[str, Any] # 详细信息
|
||
|
||
|
||
class AdaptiveReservationManager:
|
||
"""
|
||
自适应预留比例管理器
|
||
|
||
工作原理:
|
||
1. 探测阶段(请求数 < 阈值):
|
||
- 使用低预留比例(10%),不浪费资源
|
||
- 让系统快速探测真实并发限制
|
||
|
||
2. 稳定阶段(请求数 >= 阈值):
|
||
- 根据置信度和负载动态计算预留比例
|
||
- 置信度高 + 负载高 = 高预留(保护缓存用户)
|
||
- 置信度低或负载低 = 低预留(避免浪费)
|
||
|
||
置信度因素:
|
||
- 连续成功次数:越多说明当前限制越准确
|
||
- 429冷却时间:距离上次429越久越稳定
|
||
- 调整历史稳定性:最近调整的方差越小越稳定
|
||
"""
|
||
|
||
def __init__(self, config: Optional[ReservationConfig] = None):
|
||
self.config = config or ReservationConfig()
|
||
self._cache: Dict[str, ReservationResult] = {} # 简单的内存缓存
|
||
|
||
def calculate_reservation(
|
||
self,
|
||
key: "ProviderAPIKey",
|
||
current_concurrent: int = 0,
|
||
effective_limit: Optional[int] = None,
|
||
) -> ReservationResult:
|
||
"""
|
||
计算当前应使用的预留比例
|
||
|
||
Args:
|
||
key: ProviderAPIKey 对象
|
||
current_concurrent: 当前并发数
|
||
effective_limit: 有效并发限制(学习值或配置值)
|
||
|
||
Returns:
|
||
ReservationResult 包含预留比例和详细信息
|
||
"""
|
||
# 计算总请求数(用于判断阶段)
|
||
total_requests = self._get_total_requests(key)
|
||
|
||
# 计算负载率
|
||
load_ratio = self._calculate_load_ratio(current_concurrent, effective_limit)
|
||
|
||
# 阶段1: 探测阶段
|
||
if total_requests < self.config.probe_phase_requests:
|
||
return ReservationResult(
|
||
ratio=self.config.probe_reservation,
|
||
phase="probe",
|
||
confidence=0.0,
|
||
load_factor=load_ratio,
|
||
details={
|
||
"total_requests": total_requests,
|
||
"probe_threshold": self.config.probe_phase_requests,
|
||
"reason": "探测阶段,使用低预留让系统学习真实限制",
|
||
},
|
||
)
|
||
|
||
# 阶段2: 稳定阶段
|
||
confidence = self._calculate_confidence(key)
|
||
ratio = self._calculate_stable_ratio(confidence, load_ratio)
|
||
|
||
return ReservationResult(
|
||
ratio=ratio,
|
||
phase="stable",
|
||
confidence=confidence,
|
||
load_factor=load_ratio,
|
||
details={
|
||
"total_requests": total_requests,
|
||
"confidence_factors": self._get_confidence_breakdown(key),
|
||
"reason": self._get_ratio_reason(confidence, load_ratio),
|
||
},
|
||
)
|
||
|
||
def _get_total_requests(self, key: "ProviderAPIKey") -> int:
|
||
"""获取总请求数(用于判断是否过了探测阶段)"""
|
||
# 使用总请求计数作为基准
|
||
request_count = key.request_count or 0
|
||
|
||
# 如果 request_count 为 0,使用 429 计数 + 成功计数作为近似值
|
||
if request_count == 0:
|
||
concurrent_429 = key.concurrent_429_count or 0
|
||
rpm_429 = key.rpm_429_count or 0
|
||
success_count = key.success_count or 0
|
||
# 调整历史中的记录数也可以参考
|
||
history_count = len(key.adjustment_history or []) * 10
|
||
return concurrent_429 + rpm_429 + success_count + history_count
|
||
|
||
return request_count
|
||
|
||
def _calculate_load_ratio(
|
||
self, current_concurrent: int, effective_limit: Optional[int]
|
||
) -> float:
|
||
"""计算当前负载率"""
|
||
if not effective_limit or effective_limit <= 0:
|
||
return 0.0
|
||
return min(current_concurrent / effective_limit, 1.0)
|
||
|
||
def _calculate_confidence(self, key: "ProviderAPIKey") -> float:
|
||
"""
|
||
计算学习值的置信度 (0-1)
|
||
|
||
三个因素各占一定权重:
|
||
- 成功率:40%(基于总成功数/总请求数)
|
||
- 429冷却时间:30%
|
||
- 调整历史稳定性:30%
|
||
"""
|
||
scores = self._get_confidence_breakdown(key)
|
||
return min(
|
||
scores["success_score"] + scores["cooldown_score"] + scores["stability_score"], 1.0
|
||
)
|
||
|
||
def _get_confidence_breakdown(self, key: "ProviderAPIKey") -> Dict[str, float]:
|
||
"""获取置信度各因素的详细分数"""
|
||
# 因素1: 成功率(权重 40%)
|
||
# 使用成功率而非连续成功次数,更准确反映 Key 的稳定性
|
||
request_count = key.request_count or 0
|
||
success_count = key.success_count or 0
|
||
|
||
if request_count >= self.config.success_count_for_full_confidence:
|
||
# 请求数足够时,根据成功率计算
|
||
success_rate = success_count / request_count if request_count > 0 else 0
|
||
success_score = success_rate * 0.4
|
||
elif request_count > 0:
|
||
# 请求数不足时,按比例折算
|
||
progress_ratio = request_count / self.config.success_count_for_full_confidence
|
||
success_rate = success_count / request_count
|
||
success_score = success_rate * progress_ratio * 0.4
|
||
else:
|
||
success_score = 0.0
|
||
|
||
# 因素2: 429冷却时间(权重 30%)
|
||
if key.last_429_at:
|
||
now = datetime.now(timezone.utc)
|
||
# 确保 last_429_at 有时区信息
|
||
last_429 = key.last_429_at
|
||
if last_429.tzinfo is None:
|
||
last_429 = last_429.replace(tzinfo=timezone.utc)
|
||
hours_since_429 = (now - last_429).total_seconds() / 3600
|
||
cooldown_ratio = min(
|
||
hours_since_429 / self.config.cooldown_hours_for_full_confidence, 1.0
|
||
)
|
||
cooldown_score = cooldown_ratio * 0.3
|
||
else:
|
||
# 从未触发 429,给满分
|
||
cooldown_score = 0.3
|
||
|
||
# 因素3: 调整历史稳定性(权重 30%)
|
||
history = key.adjustment_history or []
|
||
if len(history) >= 3:
|
||
# 取最近的调整记录
|
||
recent = history[-5:] if len(history) >= 5 else history
|
||
limits = [h.get("new_limit", 0) for h in recent if h.get("new_limit")]
|
||
|
||
if len(limits) >= 2:
|
||
try:
|
||
variance = statistics.variance(limits)
|
||
# 方差越小越稳定,方差为10时分数接近0
|
||
stability_ratio = max(0, 1 - variance / 10)
|
||
stability_score = stability_ratio * 0.3
|
||
except statistics.StatisticsError:
|
||
stability_score = 0.15
|
||
else:
|
||
stability_score = 0.15
|
||
else:
|
||
# 历史数据不足,给一半分
|
||
stability_score = 0.15
|
||
|
||
# 计算成功率用于返回
|
||
success_rate_pct = (success_count / request_count * 100) if request_count > 0 else None
|
||
|
||
return {
|
||
"success_score": round(success_score, 3),
|
||
"cooldown_score": round(cooldown_score, 3),
|
||
"stability_score": round(stability_score, 3),
|
||
"request_count": request_count,
|
||
"success_count": success_count,
|
||
"success_rate": round(success_rate_pct, 1) if success_rate_pct is not None else None,
|
||
"hours_since_429": (
|
||
round(
|
||
(
|
||
datetime.now(timezone.utc) - key.last_429_at.replace(tzinfo=timezone.utc)
|
||
).total_seconds()
|
||
/ 3600,
|
||
1,
|
||
)
|
||
if key.last_429_at
|
||
else None
|
||
),
|
||
"history_count": len(history),
|
||
}
|
||
|
||
def _calculate_stable_ratio(self, confidence: float, load_ratio: float) -> float:
|
||
"""
|
||
计算稳定阶段的预留比例
|
||
|
||
策略:
|
||
- 低负载(<50%):使用最小预留,槽位充足无需过多预留
|
||
- 中等负载(50-80%):根据置信度线性增加预留
|
||
- 高负载(>80%):根据置信度使用较高预留保护缓存用户
|
||
"""
|
||
min_r = self.config.stable_min_reservation
|
||
max_r = self.config.stable_max_reservation
|
||
|
||
if load_ratio < self.config.low_load_threshold:
|
||
# 低负载:使用最小预留
|
||
return min_r
|
||
|
||
if load_ratio < self.config.high_load_threshold:
|
||
# 中等负载:根据置信度和负载线性插值
|
||
# 负载越高、置信度越高,预留越多
|
||
load_factor = (load_ratio - self.config.low_load_threshold) / (
|
||
self.config.high_load_threshold - self.config.low_load_threshold
|
||
)
|
||
return min_r + confidence * load_factor * (max_r - min_r)
|
||
|
||
# 高负载:根据置信度决定预留比例
|
||
# 置信度高 → 接近最大预留
|
||
# 置信度低 → 保守预留(避免基于不准确的学习值过度预留)
|
||
return min_r + confidence * (max_r - min_r)
|
||
|
||
def _get_ratio_reason(self, confidence: float, load_ratio: float) -> str:
|
||
"""生成预留比例的解释"""
|
||
if load_ratio < self.config.low_load_threshold:
|
||
return f"低负载({load_ratio:.0%}),使用最小预留"
|
||
|
||
if confidence < 0.3:
|
||
return f"置信度低({confidence:.0%}),保守预留避免浪费"
|
||
|
||
if confidence > 0.7 and load_ratio > self.config.high_load_threshold:
|
||
return f"高置信度({confidence:.0%})+高负载({load_ratio:.0%}),使用较高预留保护缓存用户"
|
||
|
||
return f"置信度{confidence:.0%},负载{load_ratio:.0%},动态计算预留"
|
||
|
||
def get_stats(self) -> Dict[str, Any]:
|
||
"""获取管理器统计信息"""
|
||
return {
|
||
"config": {
|
||
"probe_phase_requests": self.config.probe_phase_requests,
|
||
"probe_reservation": self.config.probe_reservation,
|
||
"stable_min_reservation": self.config.stable_min_reservation,
|
||
"stable_max_reservation": self.config.stable_max_reservation,
|
||
"low_load_threshold": self.config.low_load_threshold,
|
||
"high_load_threshold": self.config.high_load_threshold,
|
||
},
|
||
}
|
||
|
||
|
||
# 全局单例
|
||
_reservation_manager: Optional[AdaptiveReservationManager] = None
|
||
|
||
|
||
def get_adaptive_reservation_manager() -> AdaptiveReservationManager:
|
||
"""获取全局自适应预留管理器单例"""
|
||
global _reservation_manager
|
||
if _reservation_manager is None:
|
||
_reservation_manager = AdaptiveReservationManager()
|
||
return _reservation_manager
|
||
|
||
|
||
def reset_adaptive_reservation_manager():
|
||
"""重置全局单例(用于测试)"""
|
||
global _reservation_manager
|
||
_reservation_manager = None
|