mirror of
https://github.com/fawney19/Aether.git
synced 2026-01-11 03:58:28 +08:00
feat: 添加负载均衡调度模式
- 新增 load_balance 调度模式,同优先级内随机轮换 - 前端支持三种调度模式切换:缓存亲和、负载均衡、固定顺序
This commit is contained in:
@@ -319,19 +319,6 @@
|
|||||||
<div class="flex items-center gap-2 pl-4 border-l border-border">
|
<div class="flex items-center gap-2 pl-4 border-l border-border">
|
||||||
<span class="text-xs text-muted-foreground">调度:</span>
|
<span class="text-xs text-muted-foreground">调度:</span>
|
||||||
<div class="flex gap-0.5 p-0.5 bg-muted/40 rounded-md">
|
<div class="flex gap-0.5 p-0.5 bg-muted/40 rounded-md">
|
||||||
<button
|
|
||||||
type="button"
|
|
||||||
class="px-2 py-1 text-xs font-medium rounded transition-all"
|
|
||||||
:class="[
|
|
||||||
schedulingMode === 'fixed_order'
|
|
||||||
? 'bg-primary text-primary-foreground shadow-sm'
|
|
||||||
: 'text-muted-foreground hover:text-foreground hover:bg-muted/50'
|
|
||||||
]"
|
|
||||||
title="严格按优先级顺序,不考虑缓存"
|
|
||||||
@click="schedulingMode = 'fixed_order'"
|
|
||||||
>
|
|
||||||
固定顺序
|
|
||||||
</button>
|
|
||||||
<button
|
<button
|
||||||
type="button"
|
type="button"
|
||||||
class="px-2 py-1 text-xs font-medium rounded transition-all"
|
class="px-2 py-1 text-xs font-medium rounded transition-all"
|
||||||
@@ -345,6 +332,32 @@
|
|||||||
>
|
>
|
||||||
缓存亲和
|
缓存亲和
|
||||||
</button>
|
</button>
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
class="px-2 py-1 text-xs font-medium rounded transition-all"
|
||||||
|
:class="[
|
||||||
|
schedulingMode === 'load_balance'
|
||||||
|
? 'bg-primary text-primary-foreground shadow-sm'
|
||||||
|
: 'text-muted-foreground hover:text-foreground hover:bg-muted/50'
|
||||||
|
]"
|
||||||
|
title="同优先级内随机轮换,不考虑缓存"
|
||||||
|
@click="schedulingMode = 'load_balance'"
|
||||||
|
>
|
||||||
|
负载均衡
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
class="px-2 py-1 text-xs font-medium rounded transition-all"
|
||||||
|
:class="[
|
||||||
|
schedulingMode === 'fixed_order'
|
||||||
|
? 'bg-primary text-primary-foreground shadow-sm'
|
||||||
|
: 'text-muted-foreground hover:text-foreground hover:bg-muted/50'
|
||||||
|
]"
|
||||||
|
title="严格按优先级顺序,不考虑缓存"
|
||||||
|
@click="schedulingMode = 'fixed_order'"
|
||||||
|
>
|
||||||
|
固定顺序
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -445,7 +458,7 @@ const saving = ref(false)
|
|||||||
const editingKeyPriority = ref<Record<string, string | null>>({}) // format -> keyId
|
const editingKeyPriority = ref<Record<string, string | null>>({}) // format -> keyId
|
||||||
|
|
||||||
// 调度模式状态
|
// 调度模式状态
|
||||||
const schedulingMode = ref<'fixed_order' | 'cache_affinity'>('cache_affinity')
|
const schedulingMode = ref<'fixed_order' | 'load_balance' | 'cache_affinity'>('cache_affinity')
|
||||||
|
|
||||||
// 可用的 API 格式
|
// 可用的 API 格式
|
||||||
const availableFormats = computed(() => {
|
const availableFormats = computed(() => {
|
||||||
@@ -478,7 +491,11 @@ async function loadCurrentPriorityMode() {
|
|||||||
activeMainTab.value = currentMode === 'global_key' ? 'key' : 'provider'
|
activeMainTab.value = currentMode === 'global_key' ? 'key' : 'provider'
|
||||||
|
|
||||||
const currentSchedulingMode = schedulingResponse.value || 'cache_affinity'
|
const currentSchedulingMode = schedulingResponse.value || 'cache_affinity'
|
||||||
schedulingMode.value = currentSchedulingMode === 'fixed_order' ? 'fixed_order' : 'cache_affinity'
|
if (currentSchedulingMode === 'fixed_order' || currentSchedulingMode === 'load_balance' || currentSchedulingMode === 'cache_affinity') {
|
||||||
|
schedulingMode.value = currentSchedulingMode
|
||||||
|
} else {
|
||||||
|
schedulingMode.value = 'cache_affinity'
|
||||||
|
}
|
||||||
} catch {
|
} catch {
|
||||||
activeMainTab.value = 'provider'
|
activeMainTab.value = 'provider'
|
||||||
schedulingMode.value = 'cache_affinity'
|
schedulingMode.value = 'cache_affinity'
|
||||||
|
|||||||
67
src/services/cache/aware_scheduler.py
vendored
67
src/services/cache/aware_scheduler.py
vendored
@@ -121,11 +121,13 @@ class CacheAwareScheduler:
|
|||||||
PRIORITY_MODE_GLOBAL_KEY,
|
PRIORITY_MODE_GLOBAL_KEY,
|
||||||
}
|
}
|
||||||
# 调度模式常量
|
# 调度模式常量
|
||||||
SCHEDULING_MODE_FIXED_ORDER = "fixed_order" # 固定顺序模式
|
SCHEDULING_MODE_FIXED_ORDER = "fixed_order" # 固定顺序模式:严格按优先级,忽略缓存
|
||||||
SCHEDULING_MODE_CACHE_AFFINITY = "cache_affinity" # 缓存亲和模式
|
SCHEDULING_MODE_CACHE_AFFINITY = "cache_affinity" # 缓存亲和模式:优先缓存,同优先级哈希分散
|
||||||
|
SCHEDULING_MODE_LOAD_BALANCE = "load_balance" # 负载均衡模式:忽略缓存,同优先级随机轮换
|
||||||
ALLOWED_SCHEDULING_MODES = {
|
ALLOWED_SCHEDULING_MODES = {
|
||||||
SCHEDULING_MODE_FIXED_ORDER,
|
SCHEDULING_MODE_FIXED_ORDER,
|
||||||
SCHEDULING_MODE_CACHE_AFFINITY,
|
SCHEDULING_MODE_CACHE_AFFINITY,
|
||||||
|
SCHEDULING_MODE_LOAD_BALANCE,
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@@ -680,8 +682,9 @@ class CacheAwareScheduler:
|
|||||||
f"(api_format={target_format.value}, model={model_name})"
|
f"(api_format={target_format.value}, model={model_name})"
|
||||||
)
|
)
|
||||||
|
|
||||||
# 4. 应用缓存亲和性排序(仅在缓存亲和模式下启用)
|
# 4. 根据调度模式应用不同的排序策略
|
||||||
if self.scheduling_mode == self.SCHEDULING_MODE_CACHE_AFFINITY:
|
if self.scheduling_mode == self.SCHEDULING_MODE_CACHE_AFFINITY:
|
||||||
|
# 缓存亲和模式:优先使用缓存的,同优先级内哈希分散
|
||||||
if affinity_key and candidates:
|
if affinity_key and candidates:
|
||||||
candidates = await self._apply_cache_affinity(
|
candidates = await self._apply_cache_affinity(
|
||||||
candidates=candidates,
|
candidates=candidates,
|
||||||
@@ -689,8 +692,13 @@ class CacheAwareScheduler:
|
|||||||
api_format=target_format,
|
api_format=target_format,
|
||||||
global_model_id=global_model_id,
|
global_model_id=global_model_id,
|
||||||
)
|
)
|
||||||
|
elif self.scheduling_mode == self.SCHEDULING_MODE_LOAD_BALANCE:
|
||||||
|
# 负载均衡模式:忽略缓存,同优先级内随机轮换
|
||||||
|
candidates = self._apply_load_balance(candidates)
|
||||||
|
for candidate in candidates:
|
||||||
|
candidate.is_cached = False
|
||||||
else:
|
else:
|
||||||
# 固定顺序模式:标记所有候选为非缓存
|
# 固定顺序模式:严格按优先级,忽略缓存
|
||||||
for candidate in candidates:
|
for candidate in candidates:
|
||||||
candidate.is_cached = False
|
candidate.is_cached = False
|
||||||
|
|
||||||
@@ -1163,6 +1171,57 @@ class CacheAwareScheduler:
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def _apply_load_balance(
|
||||||
|
self, candidates: List[ProviderCandidate]
|
||||||
|
) -> List[ProviderCandidate]:
|
||||||
|
"""
|
||||||
|
负载均衡模式:同优先级内随机轮换
|
||||||
|
|
||||||
|
排序逻辑:
|
||||||
|
1. 按优先级分组(provider_priority, internal_priority 或 global_priority)
|
||||||
|
2. 同优先级组内随机打乱
|
||||||
|
3. 不考虑缓存亲和性
|
||||||
|
"""
|
||||||
|
if not candidates:
|
||||||
|
return candidates
|
||||||
|
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
# 使用 tuple 作为统一的 key 类型,兼容两种模式
|
||||||
|
priority_groups: Dict[tuple, List[ProviderCandidate]] = defaultdict(list)
|
||||||
|
|
||||||
|
# 根据优先级模式选择分组方式
|
||||||
|
if self.priority_mode == self.PRIORITY_MODE_GLOBAL_KEY:
|
||||||
|
# 全局 Key 优先模式:按 global_priority 分组
|
||||||
|
for candidate in candidates:
|
||||||
|
global_priority = (
|
||||||
|
candidate.key.global_priority
|
||||||
|
if candidate.key and candidate.key.global_priority is not None
|
||||||
|
else 999999
|
||||||
|
)
|
||||||
|
priority_groups[(global_priority,)].append(candidate)
|
||||||
|
else:
|
||||||
|
# 提供商优先模式:按 (provider_priority, internal_priority) 分组
|
||||||
|
for candidate in candidates:
|
||||||
|
key = (
|
||||||
|
candidate.provider.provider_priority or 999999,
|
||||||
|
candidate.key.internal_priority if candidate.key else 999999,
|
||||||
|
)
|
||||||
|
priority_groups[key].append(candidate)
|
||||||
|
|
||||||
|
result: List[ProviderCandidate] = []
|
||||||
|
for priority in sorted(priority_groups.keys()):
|
||||||
|
group = priority_groups[priority]
|
||||||
|
if len(group) > 1:
|
||||||
|
# 同优先级内随机打乱
|
||||||
|
shuffled = list(group)
|
||||||
|
random.shuffle(shuffled)
|
||||||
|
result.extend(shuffled)
|
||||||
|
else:
|
||||||
|
result.extend(group)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
def _shuffle_keys_by_internal_priority(
|
def _shuffle_keys_by_internal_priority(
|
||||||
self,
|
self,
|
||||||
keys: List[ProviderAPIKey],
|
keys: List[ProviderAPIKey],
|
||||||
|
|||||||
Reference in New Issue
Block a user