增强LLM运行分析与按维度统计视图

2026-04-30 17:59:03 +08:00
parent 9a52eb33bf
commit ae208d7b84
4 changed files with 517 additions and 4 deletions
--- a/admin/dashboard/blueprints/system.py
+++ b/admin/dashboard/blueprints/system.py
@@ -395,6 +395,46 @@ def _extract_ai_runtime_snapshot() -> dict:
    return snapshot


+def _build_llm_runtime_analytics_payload() -> dict:
+    """构建 LLM 最近窗口分析载荷。
+
+    为什么单独抽这个函数：
+    1. 首页 AI 卡片只看摘要，而 `system_llm` 页面需要更细粒度的分组表；
+    2. 两边都依赖同一套运行时快照，避免把 scene/backend/provider/model 聚合逻辑散在多个接口里；
+    3. 第一阶段先做“最近窗口分析”，让管理员快速识别慢场景、失败模型和异常后端。
+    """
+    runtime_breakdown = UnifiedLLMClient.get_runtime_breakdown() or {}
+    overview_snapshot = _extract_ai_runtime_snapshot()
+    catalog_summary = _extract_llm_catalog_summary()
+
+    return {
+        "overview": {
+            "window_size": _safe_int(runtime_breakdown.get("window_size")),
+            "total_calls": _safe_int(runtime_breakdown.get("total_calls")),
+            "success_calls": _safe_int(runtime_breakdown.get("success_calls")),
+            "failed_calls": _safe_int(runtime_breakdown.get("failed_calls")),
+            "success_rate": _safe_float(runtime_breakdown.get("success_rate")),
+            "avg_latency_ms": _safe_float(runtime_breakdown.get("avg_latency_ms")),
+            "last_error": str(runtime_breakdown.get("last_error") or "").strip(),
+            "status": str(overview_snapshot.get("status") or "warning").strip(),
+            "summary": str(overview_snapshot.get("summary") or "").strip(),
+            "last_call": dict(runtime_breakdown.get("last_call") or {}),
+            "provider_count": _safe_int(catalog_summary.get("provider_count")),
+            "scene_count": _safe_int(catalog_summary.get("scene_count")),
+            "target_count": _safe_int(catalog_summary.get("target_count")),
+            "default_scene": str(catalog_summary.get("default_scene") or "").strip(),
+            "default_backend": str(catalog_summary.get("default_backend") or "").strip(),
+            "has_routing": bool(catalog_summary.get("has_routing")),
+        },
+        # 这里保留原始最近窗口明细，方便后续如果要做“最近 10 次调用”列表时直接复用。
+        "recent_rows": runtime_breakdown.get("rows", []) or [],
+        "by_scene": runtime_breakdown.get("by_scene", []) or [],
+        "by_backend": runtime_breakdown.get("by_backend", []) or [],
+        "by_provider": runtime_breakdown.get("by_provider", []) or [],
+        "by_model": runtime_breakdown.get("by_model", []) or [],
+    }
+
+
 def _extract_scheduler_runtime_snapshot() -> dict:
    """聚合 async_job 运行态，生成首页任务调度摘要。

@@ -1115,6 +1155,26 @@ def get_system_llm_config():
        return jsonify({"success": False, "message": str(e)}), 500


+@system_bp.route('/api/system/llm_runtime_analytics', methods=['GET'])
+@login_required
+def get_system_llm_runtime_analytics():
+    """返回 LLM 最近窗口分析结果。
+
+    这里不主动发起探活请求，也不做持久化成本结算，只消费统一客户端已经记录的最近窗口埋点：
+    1. 避免后台刷新页面反过来给 AI 服务制造额外压力；
+    2. 先把“按场景/后端/模型看成功率与耗时”做扎实；
+    3. 为后续真正的 token 成本中心预留接口形态。
+    """
+    try:
+        return jsonify({
+            "success": True,
+            "data": _build_llm_runtime_analytics_payload(),
+        })
+    except Exception as e:
+        logger.error(f"读取 LLM 运行分析失败: {e}")
+        return jsonify({"success": False, "message": str(e)}), 500
+
+
@system_bp.route('/api/system/llm_config', methods=['POST'])
@login_required
 def update_system_llm_config():