增强LLM运行分析与按维度统计视图
This commit is contained in:
@@ -109,6 +109,107 @@ class UnifiedLLMClient:
|
||||
"last_error": last_error,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _normalize_runtime_dimension_value(value: Any, fallback_label: str = "(未标记)") -> str:
|
||||
"""把分组维度统一格式化,避免后台表格里出现空白 key。
|
||||
|
||||
这里保留一个显式的“未标记”占位,有两个目的:
|
||||
1. 便于管理员快速发现是哪个插件/场景没有正确传 scene、backend、model;
|
||||
2. 比直接丢弃空值更安全,避免分析数据被“悄悄吃掉”。
|
||||
"""
|
||||
text = str(value or "").strip()
|
||||
return text or fallback_label
|
||||
|
||||
@classmethod
|
||||
def _build_runtime_breakdown_rows(
|
||||
cls,
|
||||
rows: List[Dict[str, Any]],
|
||||
dimension: str,
|
||||
fallback_label: str = "(未标记)",
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""按指定维度聚合最近窗口调用记录。
|
||||
|
||||
设计说明:
|
||||
1. 这里只聚合最近窗口内存数据,不引入新表,也不做持久化成本结算;
|
||||
2. 第一阶段目标是先让管理员看见“哪类调用更慢、更容易失败”;
|
||||
3. 等后续确认成本治理真的需要时,再把 token/金额沉淀到持久化表里。
|
||||
"""
|
||||
grouped_rows: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
for row in rows:
|
||||
group_key = cls._normalize_runtime_dimension_value(row.get(dimension), fallback_label)
|
||||
metric_row = grouped_rows.setdefault(
|
||||
group_key,
|
||||
{
|
||||
"key": group_key,
|
||||
"dimension": dimension,
|
||||
"total_calls": 0,
|
||||
"success_calls": 0,
|
||||
"failed_calls": 0,
|
||||
"latency_sum_ms": 0.0,
|
||||
"avg_latency_ms": 0.0,
|
||||
"success_rate": 0.0,
|
||||
"last_call_at": "",
|
||||
"last_trace_id": "",
|
||||
"last_error": "",
|
||||
},
|
||||
)
|
||||
|
||||
metric_row["total_calls"] += 1
|
||||
if bool(row.get("success")):
|
||||
metric_row["success_calls"] += 1
|
||||
else:
|
||||
metric_row["failed_calls"] += 1
|
||||
|
||||
metric_row["latency_sum_ms"] += float(row.get("latency_ms") or 0.0)
|
||||
|
||||
# deque 本身按时间顺序追加,因此后遍历到的同组记录就是更“新”的一次调用。
|
||||
# 这里直接覆盖最近调用信息,成本低,也足够支撑后台最近窗口分析表。
|
||||
metric_row["last_call_at"] = str(row.get("timestamp") or "").strip()
|
||||
metric_row["last_trace_id"] = str(row.get("trace_id") or "").strip()
|
||||
if not bool(row.get("success")) and row.get("error"):
|
||||
metric_row["last_error"] = str(row.get("error") or "").strip()
|
||||
|
||||
result_rows: List[Dict[str, Any]] = []
|
||||
for item in grouped_rows.values():
|
||||
total_calls = int(item.get("total_calls") or 0)
|
||||
success_calls = int(item.get("success_calls") or 0)
|
||||
item["avg_latency_ms"] = round((item.get("latency_sum_ms", 0.0) / total_calls), 2) if total_calls else 0.0
|
||||
item["success_rate"] = round((success_calls / total_calls) * 100, 2) if total_calls else 0.0
|
||||
item.pop("latency_sum_ms", None)
|
||||
result_rows.append(item)
|
||||
|
||||
return sorted(
|
||||
result_rows,
|
||||
key=lambda item: (
|
||||
-int(item.get("total_calls") or 0),
|
||||
-int(item.get("failed_calls") or 0),
|
||||
str(item.get("key") or ""),
|
||||
),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_runtime_breakdown(cls) -> Dict[str, Any]:
|
||||
"""返回最近窗口 LLM 调用的多维度聚合分析结果。
|
||||
|
||||
返回结构专门给后台“AI 成本与策略中心”第一阶段使用:
|
||||
1. 先围绕 scene / backend / provider / model 做聚合;
|
||||
2. 重点回答成功率、平均耗时、失败次数、最近错误;
|
||||
3. 暂不承诺长期留存,只服务于最近窗口的运行分析。
|
||||
"""
|
||||
with cls._runtime_lock:
|
||||
rows = list(cls._runtime_metrics)
|
||||
|
||||
snapshot = cls.get_runtime_snapshot()
|
||||
return {
|
||||
**snapshot,
|
||||
"rows": rows,
|
||||
"by_scene": cls._build_runtime_breakdown_rows(rows, "scene"),
|
||||
"by_backend": cls._build_runtime_breakdown_rows(rows, "backend"),
|
||||
"by_provider": cls._build_runtime_breakdown_rows(rows, "provider"),
|
||||
"by_model": cls._build_runtime_breakdown_rows(rows, "model"),
|
||||
}
|
||||
|
||||
def is_available(self) -> bool:
|
||||
if not self.enabled:
|
||||
return False
|
||||
|
||||
Reference in New Issue
Block a user