增强LLM运行分析与按维度统计视图

2026-04-30 17:59:03 +08:00
parent 9a52eb33bf
commit ae208d7b84
4 changed files with 517 additions and 4 deletions
--- a/utils/ai/unified_llm.py
+++ b/utils/ai/unified_llm.py
@@ -109,6 +109,107 @@ class UnifiedLLMClient:
            "last_error": last_error,
        }

+    @staticmethod
+    def _normalize_runtime_dimension_value(value: Any, fallback_label: str = "(未标记)") -> str:
+        """把分组维度统一格式化，避免后台表格里出现空白 key。
+
+        这里保留一个显式的“未标记”占位，有两个目的：
+        1. 便于管理员快速发现是哪个插件/场景没有正确传 scene、backend、model；
+        2. 比直接丢弃空值更安全，避免分析数据被“悄悄吃掉”。
+        """
+        text = str(value or "").strip()
+        return text or fallback_label
+
+    @classmethod
+    def _build_runtime_breakdown_rows(
+        cls,
+        rows: List[Dict[str, Any]],
+        dimension: str,
+        fallback_label: str = "(未标记)",
+    ) -> List[Dict[str, Any]]:
+        """按指定维度聚合最近窗口调用记录。
+
+        设计说明：
+        1. 这里只聚合最近窗口内存数据，不引入新表，也不做持久化成本结算；
+        2. 第一阶段目标是先让管理员看见“哪类调用更慢、更容易失败”；
+        3. 等后续确认成本治理真的需要时，再把 token/金额沉淀到持久化表里。
+        """
+        grouped_rows: Dict[str, Dict[str, Any]] = {}
+
+        for row in rows:
+            group_key = cls._normalize_runtime_dimension_value(row.get(dimension), fallback_label)
+            metric_row = grouped_rows.setdefault(
+                group_key,
+                {
+                    "key": group_key,
+                    "dimension": dimension,
+                    "total_calls": 0,
+                    "success_calls": 0,
+                    "failed_calls": 0,
+                    "latency_sum_ms": 0.0,
+                    "avg_latency_ms": 0.0,
+                    "success_rate": 0.0,
+                    "last_call_at": "",
+                    "last_trace_id": "",
+                    "last_error": "",
+                },
+            )
+
+            metric_row["total_calls"] += 1
+            if bool(row.get("success")):
+                metric_row["success_calls"] += 1
+            else:
+                metric_row["failed_calls"] += 1
+
+            metric_row["latency_sum_ms"] += float(row.get("latency_ms") or 0.0)
+
+            # deque 本身按时间顺序追加，因此后遍历到的同组记录就是更“新”的一次调用。
+            # 这里直接覆盖最近调用信息，成本低，也足够支撑后台最近窗口分析表。
+            metric_row["last_call_at"] = str(row.get("timestamp") or "").strip()
+            metric_row["last_trace_id"] = str(row.get("trace_id") or "").strip()
+            if not bool(row.get("success")) and row.get("error"):
+                metric_row["last_error"] = str(row.get("error") or "").strip()
+
+        result_rows: List[Dict[str, Any]] = []
+        for item in grouped_rows.values():
+            total_calls = int(item.get("total_calls") or 0)
+            success_calls = int(item.get("success_calls") or 0)
+            item["avg_latency_ms"] = round((item.get("latency_sum_ms", 0.0) / total_calls), 2) if total_calls else 0.0
+            item["success_rate"] = round((success_calls / total_calls) * 100, 2) if total_calls else 0.0
+            item.pop("latency_sum_ms", None)
+            result_rows.append(item)
+
+        return sorted(
+            result_rows,
+            key=lambda item: (
+                -int(item.get("total_calls") or 0),
+                -int(item.get("failed_calls") or 0),
+                str(item.get("key") or ""),
+            ),
+        )
+
+    @classmethod
+    def get_runtime_breakdown(cls) -> Dict[str, Any]:
+        """返回最近窗口 LLM 调用的多维度聚合分析结果。
+
+        返回结构专门给后台“AI 成本与策略中心”第一阶段使用：
+        1. 先围绕 scene / backend / provider / model 做聚合；
+        2. 重点回答成功率、平均耗时、失败次数、最近错误；
+        3. 暂不承诺长期留存，只服务于最近窗口的运行分析。
+        """
+        with cls._runtime_lock:
+            rows = list(cls._runtime_metrics)
+
+        snapshot = cls.get_runtime_snapshot()
+        return {
+            **snapshot,
+            "rows": rows,
+            "by_scene": cls._build_runtime_breakdown_rows(rows, "scene"),
+            "by_backend": cls._build_runtime_breakdown_rows(rows, "backend"),
+            "by_provider": cls._build_runtime_breakdown_rows(rows, "provider"),
+            "by_model": cls._build_runtime_breakdown_rows(rows, "model"),
+        }
+
    def is_available(self) -> bool:
        if not self.enabled:
            return False