From 1521ce5a96f98509db157cdc8291a28d632bc07a Mon Sep 17 00:00:00 2001
From: fawney19 <elky0401@gmail.com>
Date: Thu, 8 Jan 2026 03:20:04 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E8=B4=9F=E8=BD=BD?=
 =?UTF-8?q?=E5=9D=87=E8=A1=A1=E8=B0=83=E5=BA=A6=E6=A8=A1=E5=BC=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 新增 load_balance 调度模式，同优先级内随机轮换
- 前端支持三种调度模式切换：缓存亲和、负载均衡、固定顺序
---
 .../components/PriorityManagementDialog.vue   | 47 ++++++++-----
 src/services/cache/aware_scheduler.py         | 67 +++++++++++++++++--
 2 files changed, 95 insertions(+), 19 deletions(-)
diff --git a/frontend/src/features/providers/components/PriorityManagementDialog.vue b/frontend/src/features/providers/components/PriorityManagementDialog.vue
index 153aea2..7265279 100644
--- a/frontend/src/features/providers/components/PriorityManagementDialog.vue
+++ b/frontend/src/features/providers/components/PriorityManagementDialog.vue
@@ -319,19 +319,6 @@
           <div class="flex items-center gap-2 pl-4 border-l border-border">
             <span class="text-xs text-muted-foreground">调度:</span>
             <div class="flex gap-0.5 p-0.5 bg-muted/40 rounded-md">
-              <button
-                type="button"
-                class="px-2 py-1 text-xs font-medium rounded transition-all"
-                :class="[
-                  schedulingMode === 'fixed_order'
-                    ? 'bg-primary text-primary-foreground shadow-sm'
-                    : 'text-muted-foreground hover:text-foreground hover:bg-muted/50'
-                ]"
-                title="严格按优先级顺序，不考虑缓存"
-                @click="schedulingMode = 'fixed_order'"
-              >
-                固定顺序
-              </button>
               <button
                 type="button"
                 class="px-2 py-1 text-xs font-medium rounded transition-all"
@@ -345,6 +332,32 @@
               >
                 缓存亲和
               </button>
+              <button
+                type="button"
+                class="px-2 py-1 text-xs font-medium rounded transition-all"
+                :class="[
+                  schedulingMode === 'load_balance'
+                    ? 'bg-primary text-primary-foreground shadow-sm'
+                    : 'text-muted-foreground hover:text-foreground hover:bg-muted/50'
+                ]"
+                title="同优先级内随机轮换，不考虑缓存"
+                @click="schedulingMode = 'load_balance'"
+              >
+                负载均衡
+              </button>
+              <button
+                type="button"
+                class="px-2 py-1 text-xs font-medium rounded transition-all"
+                :class="[
+                  schedulingMode === 'fixed_order'
+                    ? 'bg-primary text-primary-foreground shadow-sm'
+                    : 'text-muted-foreground hover:text-foreground hover:bg-muted/50'
+                ]"
+                title="严格按优先级顺序，不考虑缓存"
+                @click="schedulingMode = 'fixed_order'"
+              >
+                固定顺序
+              </button>
             </div>
           </div>
         </div>
@@ -445,7 +458,7 @@ const saving = ref(false)
 const editingKeyPriority = ref<Record<string, string | null>>({})  // format -> keyId
 
 // 调度模式状态
-const schedulingMode = ref<'fixed_order' | 'cache_affinity'>('cache_affinity')
+const schedulingMode = ref<'fixed_order' | 'load_balance' | 'cache_affinity'>('cache_affinity')
 
 // 可用的 API 格式
 const availableFormats = computed(() => {
@@ -478,7 +491,11 @@ async function loadCurrentPriorityMode() {
     activeMainTab.value = currentMode === 'global_key' ? 'key' : 'provider'
 
     const currentSchedulingMode = schedulingResponse.value || 'cache_affinity'
-    schedulingMode.value = currentSchedulingMode === 'fixed_order' ? 'fixed_order' : 'cache_affinity'
+    if (currentSchedulingMode === 'fixed_order' || currentSchedulingMode === 'load_balance' || currentSchedulingMode === 'cache_affinity') {
+      schedulingMode.value = currentSchedulingMode
+    } else {
+      schedulingMode.value = 'cache_affinity'
+    }
   } catch {
     activeMainTab.value = 'provider'
     schedulingMode.value = 'cache_affinity'
diff --git a/src/services/cache/aware_scheduler.py b/src/services/cache/aware_scheduler.py
index 3257240..c38971f 100644
--- a/src/services/cache/aware_scheduler.py
+++ b/src/services/cache/aware_scheduler.py
@@ -121,11 +121,13 @@ class CacheAwareScheduler:
         PRIORITY_MODE_GLOBAL_KEY,
     }
     # 调度模式常量
-    SCHEDULING_MODE_FIXED_ORDER = "fixed_order"  # 固定顺序模式
-    SCHEDULING_MODE_CACHE_AFFINITY = "cache_affinity"  # 缓存亲和模式
+    SCHEDULING_MODE_FIXED_ORDER = "fixed_order"  # 固定顺序模式：严格按优先级，忽略缓存
+    SCHEDULING_MODE_CACHE_AFFINITY = "cache_affinity"  # 缓存亲和模式：优先缓存，同优先级哈希分散
+    SCHEDULING_MODE_LOAD_BALANCE = "load_balance"  # 负载均衡模式：忽略缓存，同优先级随机轮换
     ALLOWED_SCHEDULING_MODES = {
         SCHEDULING_MODE_FIXED_ORDER,
         SCHEDULING_MODE_CACHE_AFFINITY,
+        SCHEDULING_MODE_LOAD_BALANCE,
     }
 
     def __init__(
@@ -680,8 +682,9 @@ class CacheAwareScheduler:
             f"(api_format={target_format.value}, model={model_name})"
         )
 
-        # 4. 应用缓存亲和性排序（仅在缓存亲和模式下启用）
+        # 4. 根据调度模式应用不同的排序策略
         if self.scheduling_mode == self.SCHEDULING_MODE_CACHE_AFFINITY:
+            # 缓存亲和模式：优先使用缓存的，同优先级内哈希分散
             if affinity_key and candidates:
                 candidates = await self._apply_cache_affinity(
                     candidates=candidates,
@@ -689,8 +692,13 @@ class CacheAwareScheduler:
                     api_format=target_format,
                     global_model_id=global_model_id,
                 )
+        elif self.scheduling_mode == self.SCHEDULING_MODE_LOAD_BALANCE:
+            # 负载均衡模式：忽略缓存，同优先级内随机轮换
+            candidates = self._apply_load_balance(candidates)
+            for candidate in candidates:
+                candidate.is_cached = False
         else:
-            # 固定顺序模式：标记所有候选为非缓存
+            # 固定顺序模式：严格按优先级，忽略缓存
             for candidate in candidates:
                 candidate.is_cached = False
 
@@ -1163,6 +1171,57 @@ class CacheAwareScheduler:
 
         return result
 
+    def _apply_load_balance(
+        self, candidates: List[ProviderCandidate]
+    ) -> List[ProviderCandidate]:
+        """
+        负载均衡模式：同优先级内随机轮换
+
+        排序逻辑：
+        1. 按优先级分组（provider_priority, internal_priority 或 global_priority）
+        2. 同优先级组内随机打乱
+        3. 不考虑缓存亲和性
+        """
+        if not candidates:
+            return candidates
+
+        from collections import defaultdict
+
+        # 使用 tuple 作为统一的 key 类型，兼容两种模式
+        priority_groups: Dict[tuple, List[ProviderCandidate]] = defaultdict(list)
+
+        # 根据优先级模式选择分组方式
+        if self.priority_mode == self.PRIORITY_MODE_GLOBAL_KEY:
+            # 全局 Key 优先模式：按 global_priority 分组
+            for candidate in candidates:
+                global_priority = (
+                    candidate.key.global_priority
+                    if candidate.key and candidate.key.global_priority is not None
+                    else 999999
+                )
+                priority_groups[(global_priority,)].append(candidate)
+        else:
+            # 提供商优先模式：按 (provider_priority, internal_priority) 分组
+            for candidate in candidates:
+                key = (
+                    candidate.provider.provider_priority or 999999,
+                    candidate.key.internal_priority if candidate.key else 999999,
+                )
+                priority_groups[key].append(candidate)
+
+        result: List[ProviderCandidate] = []
+        for priority in sorted(priority_groups.keys()):
+            group = priority_groups[priority]
+            if len(group) > 1:
+                # 同优先级内随机打乱
+                shuffled = list(group)
+                random.shuffle(shuffled)
+                result.extend(shuffled)
+            else:
+                result.extend(group)
+
+        return result
+
     def _shuffle_keys_by_internal_priority(
         self,
         keys: List[ProviderAPIKey],