From 1521ce5a96f98509db157cdc8291a28d632bc07a Mon Sep 17 00:00:00 2001 From: fawney19 Date: Thu, 8 Jan 2026 03:20:04 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E8=B4=9F=E8=BD=BD?= =?UTF-8?q?=E5=9D=87=E8=A1=A1=E8=B0=83=E5=BA=A6=E6=A8=A1=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 load_balance 调度模式,同优先级内随机轮换 - 前端支持三种调度模式切换:缓存亲和、负载均衡、固定顺序 --- .../components/PriorityManagementDialog.vue | 47 ++++++++----- src/services/cache/aware_scheduler.py | 67 +++++++++++++++++-- 2 files changed, 95 insertions(+), 19 deletions(-) diff --git a/frontend/src/features/providers/components/PriorityManagementDialog.vue b/frontend/src/features/providers/components/PriorityManagementDialog.vue index 153aea2..7265279 100644 --- a/frontend/src/features/providers/components/PriorityManagementDialog.vue +++ b/frontend/src/features/providers/components/PriorityManagementDialog.vue @@ -319,19 +319,6 @@
调度:
- + +
@@ -445,7 +458,7 @@ const saving = ref(false) const editingKeyPriority = ref>({}) // format -> keyId // 调度模式状态 -const schedulingMode = ref<'fixed_order' | 'cache_affinity'>('cache_affinity') +const schedulingMode = ref<'fixed_order' | 'load_balance' | 'cache_affinity'>('cache_affinity') // 可用的 API 格式 const availableFormats = computed(() => { @@ -478,7 +491,11 @@ async function loadCurrentPriorityMode() { activeMainTab.value = currentMode === 'global_key' ? 'key' : 'provider' const currentSchedulingMode = schedulingResponse.value || 'cache_affinity' - schedulingMode.value = currentSchedulingMode === 'fixed_order' ? 'fixed_order' : 'cache_affinity' + if (currentSchedulingMode === 'fixed_order' || currentSchedulingMode === 'load_balance' || currentSchedulingMode === 'cache_affinity') { + schedulingMode.value = currentSchedulingMode + } else { + schedulingMode.value = 'cache_affinity' + } } catch { activeMainTab.value = 'provider' schedulingMode.value = 'cache_affinity' diff --git a/src/services/cache/aware_scheduler.py b/src/services/cache/aware_scheduler.py index 3257240..c38971f 100644 --- a/src/services/cache/aware_scheduler.py +++ b/src/services/cache/aware_scheduler.py @@ -121,11 +121,13 @@ class CacheAwareScheduler: PRIORITY_MODE_GLOBAL_KEY, } # 调度模式常量 - SCHEDULING_MODE_FIXED_ORDER = "fixed_order" # 固定顺序模式 - SCHEDULING_MODE_CACHE_AFFINITY = "cache_affinity" # 缓存亲和模式 + SCHEDULING_MODE_FIXED_ORDER = "fixed_order" # 固定顺序模式:严格按优先级,忽略缓存 + SCHEDULING_MODE_CACHE_AFFINITY = "cache_affinity" # 缓存亲和模式:优先缓存,同优先级哈希分散 + SCHEDULING_MODE_LOAD_BALANCE = "load_balance" # 负载均衡模式:忽略缓存,同优先级随机轮换 ALLOWED_SCHEDULING_MODES = { SCHEDULING_MODE_FIXED_ORDER, SCHEDULING_MODE_CACHE_AFFINITY, + SCHEDULING_MODE_LOAD_BALANCE, } def __init__( @@ -680,8 +682,9 @@ class CacheAwareScheduler: f"(api_format={target_format.value}, model={model_name})" ) - # 4. 应用缓存亲和性排序(仅在缓存亲和模式下启用) + # 4. 根据调度模式应用不同的排序策略 if self.scheduling_mode == self.SCHEDULING_MODE_CACHE_AFFINITY: + # 缓存亲和模式:优先使用缓存的,同优先级内哈希分散 if affinity_key and candidates: candidates = await self._apply_cache_affinity( candidates=candidates, @@ -689,8 +692,13 @@ class CacheAwareScheduler: api_format=target_format, global_model_id=global_model_id, ) + elif self.scheduling_mode == self.SCHEDULING_MODE_LOAD_BALANCE: + # 负载均衡模式:忽略缓存,同优先级内随机轮换 + candidates = self._apply_load_balance(candidates) + for candidate in candidates: + candidate.is_cached = False else: - # 固定顺序模式:标记所有候选为非缓存 + # 固定顺序模式:严格按优先级,忽略缓存 for candidate in candidates: candidate.is_cached = False @@ -1163,6 +1171,57 @@ class CacheAwareScheduler: return result + def _apply_load_balance( + self, candidates: List[ProviderCandidate] + ) -> List[ProviderCandidate]: + """ + 负载均衡模式:同优先级内随机轮换 + + 排序逻辑: + 1. 按优先级分组(provider_priority, internal_priority 或 global_priority) + 2. 同优先级组内随机打乱 + 3. 不考虑缓存亲和性 + """ + if not candidates: + return candidates + + from collections import defaultdict + + # 使用 tuple 作为统一的 key 类型,兼容两种模式 + priority_groups: Dict[tuple, List[ProviderCandidate]] = defaultdict(list) + + # 根据优先级模式选择分组方式 + if self.priority_mode == self.PRIORITY_MODE_GLOBAL_KEY: + # 全局 Key 优先模式:按 global_priority 分组 + for candidate in candidates: + global_priority = ( + candidate.key.global_priority + if candidate.key and candidate.key.global_priority is not None + else 999999 + ) + priority_groups[(global_priority,)].append(candidate) + else: + # 提供商优先模式:按 (provider_priority, internal_priority) 分组 + for candidate in candidates: + key = ( + candidate.provider.provider_priority or 999999, + candidate.key.internal_priority if candidate.key else 999999, + ) + priority_groups[key].append(candidate) + + result: List[ProviderCandidate] = [] + for priority in sorted(priority_groups.keys()): + group = priority_groups[priority] + if len(group) > 1: + # 同优先级内随机打乱 + shuffled = list(group) + random.shuffle(shuffled) + result.extend(shuffled) + else: + result.extend(group) + + return result + def _shuffle_keys_by_internal_priority( self, keys: List[ProviderAPIKey],