Aether/src/core/metrics.py

"""
Prometheus metrics for monitoring
"""

from prometheus_client import Counter, Gauge, Histogram

# 并发槽位占用时长分布
concurrency_slot_duration_seconds = Histogram(
    "concurrency_slot_duration_seconds",
    "Duration of concurrency slot occupation in seconds",
    ["key_id", "exception"],
    buckets=[0.1, 0.5, 1, 5, 10, 30, 60, 120, 300, 600],  # 0.1s 到 10 分钟
)

# 并发槽位释放计数
concurrency_slot_release_total = Counter(
    "concurrency_slot_release_total",
    "Total number of concurrency slot releases",
    ["key_id", "exception"],
)

# 当前并发槽位使用数
concurrency_slots_in_use = Gauge(
    "concurrency_slots_in_use", "Current number of concurrency slots in use", ["key_id"]
)

# 流式请求时长分布
streaming_request_duration_seconds = Histogram(
    "streaming_request_duration_seconds",
    "Duration of streaming requests in seconds",
    ["key_id", "status"],
    buckets=[1, 5, 10, 30, 60, 120, 300, 600, 1800],  # 1s 到 30 分钟
)

# 请求总数（按类型）
request_total = Counter(
    "request_total",
    "Total number of requests",
    ["type", "status"],  # type values: streaming/non-streaming, status: success/error
)

# 健康监控相关
health_open_circuits = Gauge(
    "health_open_circuits",
    "Number of provider keys currently in circuit breaker open state",
)

# 模型映射解析相关
model_mapping_resolution_total = Counter(
    "model_mapping_resolution_total",
    "Total number of model mapping resolutions",
    ["method", "cache_hit"],
    # method: direct_match, provider_model_name, mapping, not_found
    # cache_hit: true, false
)

model_mapping_resolution_duration_seconds = Histogram(
    "model_mapping_resolution_duration_seconds",
    "Duration of model mapping resolution in seconds",
    ["method"],
    buckets=[0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0],  # 1ms 到 1s
)

model_mapping_conflict_total = Counter(
    "model_mapping_conflict_total",
    "Total number of mapping conflicts detected (same name maps to multiple GlobalModels)",
)