diff --git a/admin/dashboard/blueprints/system.py b/admin/dashboard/blueprints/system.py index b93d685..4b10997 100644 --- a/admin/dashboard/blueprints/system.py +++ b/admin/dashboard/blueprints/system.py @@ -395,6 +395,46 @@ def _extract_ai_runtime_snapshot() -> dict: return snapshot +def _build_llm_runtime_analytics_payload() -> dict: + """构建 LLM 最近窗口分析载荷。 + + 为什么单独抽这个函数: + 1. 首页 AI 卡片只看摘要,而 `system_llm` 页面需要更细粒度的分组表; + 2. 两边都依赖同一套运行时快照,避免把 scene/backend/provider/model 聚合逻辑散在多个接口里; + 3. 第一阶段先做“最近窗口分析”,让管理员快速识别慢场景、失败模型和异常后端。 + """ + runtime_breakdown = UnifiedLLMClient.get_runtime_breakdown() or {} + overview_snapshot = _extract_ai_runtime_snapshot() + catalog_summary = _extract_llm_catalog_summary() + + return { + "overview": { + "window_size": _safe_int(runtime_breakdown.get("window_size")), + "total_calls": _safe_int(runtime_breakdown.get("total_calls")), + "success_calls": _safe_int(runtime_breakdown.get("success_calls")), + "failed_calls": _safe_int(runtime_breakdown.get("failed_calls")), + "success_rate": _safe_float(runtime_breakdown.get("success_rate")), + "avg_latency_ms": _safe_float(runtime_breakdown.get("avg_latency_ms")), + "last_error": str(runtime_breakdown.get("last_error") or "").strip(), + "status": str(overview_snapshot.get("status") or "warning").strip(), + "summary": str(overview_snapshot.get("summary") or "").strip(), + "last_call": dict(runtime_breakdown.get("last_call") or {}), + "provider_count": _safe_int(catalog_summary.get("provider_count")), + "scene_count": _safe_int(catalog_summary.get("scene_count")), + "target_count": _safe_int(catalog_summary.get("target_count")), + "default_scene": str(catalog_summary.get("default_scene") or "").strip(), + "default_backend": str(catalog_summary.get("default_backend") or "").strip(), + "has_routing": bool(catalog_summary.get("has_routing")), + }, + # 这里保留原始最近窗口明细,方便后续如果要做“最近 10 次调用”列表时直接复用。 + "recent_rows": runtime_breakdown.get("rows", []) or [], + "by_scene": runtime_breakdown.get("by_scene", []) or [], + "by_backend": runtime_breakdown.get("by_backend", []) or [], + "by_provider": runtime_breakdown.get("by_provider", []) or [], + "by_model": runtime_breakdown.get("by_model", []) or [], + } + + def _extract_scheduler_runtime_snapshot() -> dict: """聚合 async_job 运行态,生成首页任务调度摘要。 @@ -1115,6 +1155,26 @@ def get_system_llm_config(): return jsonify({"success": False, "message": str(e)}), 500 +@system_bp.route('/api/system/llm_runtime_analytics', methods=['GET']) +@login_required +def get_system_llm_runtime_analytics(): + """返回 LLM 最近窗口分析结果。 + + 这里不主动发起探活请求,也不做持久化成本结算,只消费统一客户端已经记录的最近窗口埋点: + 1. 避免后台刷新页面反过来给 AI 服务制造额外压力; + 2. 先把“按场景/后端/模型看成功率与耗时”做扎实; + 3. 为后续真正的 token 成本中心预留接口形态。 + """ + try: + return jsonify({ + "success": True, + "data": _build_llm_runtime_analytics_payload(), + }) + except Exception as e: + logger.error(f"读取 LLM 运行分析失败: {e}") + return jsonify({"success": False, "message": str(e)}), 500 + + @system_bp.route('/api/system/llm_config', methods=['POST']) @login_required def update_system_llm_config(): diff --git a/admin/dashboard/templates/system_llm.html b/admin/dashboard/templates/system_llm.html index 3604102..53dc82a 100644 --- a/admin/dashboard/templates/system_llm.html +++ b/admin/dashboard/templates/system_llm.html @@ -8,14 +8,181 @@
LLM Catalog

LLM目录配置

-

按 Provider 模板、Dify 应用、Scene 绑定三层维护,减少重复配置和切换成本。

+

按 Provider 模板、Dify 应用、Scene 绑定三层维护,并结合最近窗口运行分析判断哪条 AI 路由更慢、更容易失败。

- 刷新 + 刷新 保存配置
+ +
+
+

AI运行分析

+

基于统一 LLM 客户端最近窗口埋点做被动观测,不额外发起探活请求。

+
+
+ 窗口容量:{% raw %}{{ runtimeAnalytics.overview.window_size || 0 }}{% endraw %} + 默认场景:{% raw %}{{ runtimeAnalytics.overview.default_scene || '-' }}{% endraw %} + 默认目标:{% raw %}{{ runtimeAnalytics.overview.default_backend || '-' }}{% endraw %} +
+
+ +
+
+
最近调用
+
{% raw %}{{ runtimeAnalytics.overview.total_calls || 0 }}{% endraw %}
+
成功 {% raw %}{{ runtimeAnalytics.overview.success_calls || 0 }}{% endraw %} / 失败 {% raw %}{{ runtimeAnalytics.overview.failed_calls || 0 }}{% endraw %}
+
+
+
成功率
+
{% raw %}{{ formatPercent(runtimeAnalytics.overview.success_rate) }}{% endraw %}
+
按最近窗口实时汇总
+
+
+
平均耗时
+
{% raw %}{{ formatLatency(runtimeAnalytics.overview.avg_latency_ms) }}{% endraw %}
+
用于快速识别慢场景
+
+
+
路由规模
+
{% raw %}{{ runtimeAnalytics.overview.scene_count || 0 }}{% endraw %}
+
场景数 / 目标数 {% raw %}{{ runtimeAnalytics.overview.target_count || 0 }}{% endraw %}
+
+
+ +
+
+ + {% raw %}{{ statusText(runtimeAnalytics.overview.status) }}{% endraw %} + + {% raw %}{{ runtimeAnalytics.overview.summary || '最近窗口内暂无统一 LLM 调用记录' }}{% endraw %} +
+
+ Provider 模板:{% raw %}{{ runtimeAnalytics.overview.provider_count || 0 }}{% endraw %} + 最近场景:{% raw %}{{ runtimeAnalytics.overview.last_call.scene || '-' }}{% endraw %} + 最近后端:{% raw %}{{ runtimeAnalytics.overview.last_call.backend || '-' }}{% endraw %} + 最近模型:{% raw %}{{ runtimeAnalytics.overview.last_call.model || '-' }}{% endraw %} + 最近时间:{% raw %}{{ runtimeAnalytics.overview.last_call.timestamp || '-' }}{% endraw %} +
+
+ 最近错误: + {% raw %}{{ runtimeAnalytics.overview.last_error }}{% endraw %} +
+
+ +
+ +
+
+

按场景统计

+

定位哪个业务场景最常调用、最容易失败。

+
+
+ + + + + + + + + + + + + + +
+ + +
+
+

按后端统计

+

观察 backend 层是否存在集中失败或慢请求。

+
+
+ + + + + + + + + + + + + + +
+ + +
+
+

按 Provider 统计

+

区分 Dify 与 OpenAI Compatible 等不同接入形态的表现。

+
+
+ + + + + + + + + + + + + + +
+ + +
+
+

按模型统计

+

帮助判断是否需要按场景切换模型或做降级策略。

+
+
+ + + + + + + + + + + + + + +
+
+
+
@@ -215,6 +382,30 @@ currentView: '17', configPath: '', topologyRows: [], + runtimeAnalyticsLoading: false, + runtimeAnalytics: { + overview: { + window_size: 0, + total_calls: 0, + success_calls: 0, + failed_calls: 0, + success_rate: 0, + avg_latency_ms: 0, + last_error: '', + status: 'warning', + summary: '', + last_call: {}, + provider_count: 0, + scene_count: 0, + target_count: 0, + default_scene: '', + default_backend: '' + }, + by_scene: [], + by_backend: [], + by_provider: [], + by_model: [] + }, catalog: { default_scene: '', providers: [], @@ -240,12 +431,45 @@ }, mounted() { this.currentView = '17'; - this.loadLlmConfig(); + this.reloadPageData(); }, methods: { newUid() { return `${Date.now()}_${Math.random().toString(36).slice(2, 8)}`; }, + // 统一刷新配置与运行分析,避免管理员点一次“刷新”只能看到半套信息。 + async reloadPageData() { + await Promise.all([ + this.loadLlmConfig(), + this.loadRuntimeAnalytics() + ]); + }, + statusTagType(status) { + if (status === 'healthy') { + return 'success'; + } + if (status === 'danger') { + return 'danger'; + } + return 'warning'; + }, + statusText(status) { + if (status === 'healthy') { + return '运行正常'; + } + if (status === 'danger') { + return '需要立即处理'; + } + return '需要关注'; + }, + formatPercent(value) { + const numeric = Number(value || 0); + return `${numeric.toFixed(2)}%`; + }, + formatLatency(value) { + const numeric = Number(value || 0); + return `${numeric.toFixed(2)} ms`; + }, // Provider 模板:只放公共字段,避免 Dify 每个应用重复填写。 newProvider() { return { @@ -396,6 +620,46 @@ } return this.difyAppNameOptions; }, + async loadRuntimeAnalytics() { + this.runtimeAnalyticsLoading = true; + try { + const response = await axios.get('/api/system/llm_runtime_analytics'); + if (!response.data.success) { + this.$message.error(response.data.message || '读取 AI 运行分析失败'); + return; + } + const data = response.data.data || {}; + const overview = data.overview || {}; + // 这里做前端兜底结构归一化,避免后端未来新增字段时影响当前页面渲染。 + this.runtimeAnalytics = { + overview: { + window_size: overview.window_size || 0, + total_calls: overview.total_calls || 0, + success_calls: overview.success_calls || 0, + failed_calls: overview.failed_calls || 0, + success_rate: overview.success_rate || 0, + avg_latency_ms: overview.avg_latency_ms || 0, + last_error: overview.last_error || '', + status: overview.status || 'warning', + summary: overview.summary || '', + last_call: overview.last_call || {}, + provider_count: overview.provider_count || 0, + scene_count: overview.scene_count || 0, + target_count: overview.target_count || 0, + default_scene: overview.default_scene || '', + default_backend: overview.default_backend || '' + }, + by_scene: data.by_scene || [], + by_backend: data.by_backend || [], + by_provider: data.by_provider || [], + by_model: data.by_model || [] + }; + } catch (error) { + this.$message.error(error.response?.data?.message || '读取 AI 运行分析失败'); + } finally { + this.runtimeAnalyticsLoading = false; + } + }, async loadLlmConfig() { try { const response = await axios.get('/api/system/llm_config'); @@ -474,7 +738,7 @@ const response = await axios.post('/api/system/llm_config', payload); if (response.data.success) { this.$message.success(response.data.message || '保存成功'); - this.loadLlmConfig(); + this.reloadPageData(); } else { this.$message.error(response.data.message || '保存失败'); } @@ -513,6 +777,86 @@ gap: 8px; flex-wrap: wrap; } + .runtime-summary-grid { + display: grid; + grid-template-columns: repeat(4, minmax(180px, 1fr)); + gap: 14px; + margin-bottom: 16px; + } + .runtime-summary-card { + padding: 16px 18px; + border-radius: 16px; + border: 1px solid rgba(148,163,184,0.18); + background: linear-gradient(180deg, rgba(255,255,255,0.96), rgba(241,245,249,0.88)); + } + .summary-label { + font-size: 12px; + color: #64748b; + margin-bottom: 8px; + } + .summary-value { + font-size: 28px; + line-height: 1; + font-weight: 700; + color: #0f172a; + margin-bottom: 8px; + } + .summary-hint { + font-size: 12px; + color: #475569; + } + .runtime-overview-panel { + padding: 16px 18px; + border-radius: 16px; + background: rgba(15, 23, 42, 0.03); + border: 1px solid rgba(148,163,184,0.14); + margin-bottom: 18px; + } + .runtime-status-row { + display: flex; + align-items: center; + gap: 10px; + flex-wrap: wrap; + margin-bottom: 10px; + } + .runtime-overview-text { + color: #0f172a; + font-size: 14px; + } + .runtime-overview-meta { + display: flex; + gap: 12px 18px; + flex-wrap: wrap; + color: #64748b; + font-size: 12px; + } + .runtime-error-box { + margin-top: 12px; + padding: 10px 12px; + border-radius: 10px; + background: rgba(239, 68, 68, 0.08); + color: #991b1b; + font-size: 12px; + line-height: 1.6; + } + .runtime-table-grid { + display: grid; + grid-template-columns: repeat(2, minmax(0, 1fr)); + gap: 16px; + } + .analytics-card { + border: 1px solid rgba(148,163,184,0.16); + border-radius: 16px; + } + .runtime-table-header h4 { + font-size: 16px; + margin-bottom: 4px; + color: #0f172a; + } + .runtime-table-header p { + color: #64748b; + font-size: 12px; + } .section-list { display: flex; flex-direction: column; gap: 12px; } .entry-card { border: 1px solid rgba(148,163,184,0.16); border-radius: 14px; } .entry-header { display: flex; align-items: center; justify-content: space-between; gap: 12px; } @@ -541,6 +885,8 @@ @media (max-width: 960px) { .page-hero { flex-direction: column; align-items: flex-start; } .workspace-header { flex-direction: column; align-items: flex-start; } + .runtime-summary-grid { grid-template-columns: 1fr; } + .runtime-table-grid { grid-template-columns: 1fr; } .entry-grid { grid-template-columns: 1fr; } .scene-row { grid-template-columns: 1fr; } } diff --git a/docs/工程优化与Feature清单.md b/docs/工程优化与Feature清单.md index 2b20d24..b105441 100644 --- a/docs/工程优化与Feature清单.md +++ b/docs/工程优化与Feature清单.md @@ -555,6 +555,12 @@ - 让 AI 能力更可控、更可衡量 +当前进展: + +- 第一阶段已完成:后台 `LLM目录配置` 页面已补充“AI运行分析”区块,可查看最近窗口内统一 LLM 调用的成功率、平均耗时、失败次数与最近错误 +- 第一阶段已完成:已支持按 `scene / backend / provider / model` 四个维度聚合最近窗口调用数据,便于快速识别慢场景、异常后端与高失败模型 +- 当前仍以“最近窗口运行分析”为主,暂未引入持久化 token 成本结算;后续可在确认治理需求后继续扩展预算、告警与降级策略 + 建议内容: - 统计各插件 token 消耗 diff --git a/utils/ai/unified_llm.py b/utils/ai/unified_llm.py index 246aed1..3ebf745 100644 --- a/utils/ai/unified_llm.py +++ b/utils/ai/unified_llm.py @@ -109,6 +109,107 @@ class UnifiedLLMClient: "last_error": last_error, } + @staticmethod + def _normalize_runtime_dimension_value(value: Any, fallback_label: str = "(未标记)") -> str: + """把分组维度统一格式化,避免后台表格里出现空白 key。 + + 这里保留一个显式的“未标记”占位,有两个目的: + 1. 便于管理员快速发现是哪个插件/场景没有正确传 scene、backend、model; + 2. 比直接丢弃空值更安全,避免分析数据被“悄悄吃掉”。 + """ + text = str(value or "").strip() + return text or fallback_label + + @classmethod + def _build_runtime_breakdown_rows( + cls, + rows: List[Dict[str, Any]], + dimension: str, + fallback_label: str = "(未标记)", + ) -> List[Dict[str, Any]]: + """按指定维度聚合最近窗口调用记录。 + + 设计说明: + 1. 这里只聚合最近窗口内存数据,不引入新表,也不做持久化成本结算; + 2. 第一阶段目标是先让管理员看见“哪类调用更慢、更容易失败”; + 3. 等后续确认成本治理真的需要时,再把 token/金额沉淀到持久化表里。 + """ + grouped_rows: Dict[str, Dict[str, Any]] = {} + + for row in rows: + group_key = cls._normalize_runtime_dimension_value(row.get(dimension), fallback_label) + metric_row = grouped_rows.setdefault( + group_key, + { + "key": group_key, + "dimension": dimension, + "total_calls": 0, + "success_calls": 0, + "failed_calls": 0, + "latency_sum_ms": 0.0, + "avg_latency_ms": 0.0, + "success_rate": 0.0, + "last_call_at": "", + "last_trace_id": "", + "last_error": "", + }, + ) + + metric_row["total_calls"] += 1 + if bool(row.get("success")): + metric_row["success_calls"] += 1 + else: + metric_row["failed_calls"] += 1 + + metric_row["latency_sum_ms"] += float(row.get("latency_ms") or 0.0) + + # deque 本身按时间顺序追加,因此后遍历到的同组记录就是更“新”的一次调用。 + # 这里直接覆盖最近调用信息,成本低,也足够支撑后台最近窗口分析表。 + metric_row["last_call_at"] = str(row.get("timestamp") or "").strip() + metric_row["last_trace_id"] = str(row.get("trace_id") or "").strip() + if not bool(row.get("success")) and row.get("error"): + metric_row["last_error"] = str(row.get("error") or "").strip() + + result_rows: List[Dict[str, Any]] = [] + for item in grouped_rows.values(): + total_calls = int(item.get("total_calls") or 0) + success_calls = int(item.get("success_calls") or 0) + item["avg_latency_ms"] = round((item.get("latency_sum_ms", 0.0) / total_calls), 2) if total_calls else 0.0 + item["success_rate"] = round((success_calls / total_calls) * 100, 2) if total_calls else 0.0 + item.pop("latency_sum_ms", None) + result_rows.append(item) + + return sorted( + result_rows, + key=lambda item: ( + -int(item.get("total_calls") or 0), + -int(item.get("failed_calls") or 0), + str(item.get("key") or ""), + ), + ) + + @classmethod + def get_runtime_breakdown(cls) -> Dict[str, Any]: + """返回最近窗口 LLM 调用的多维度聚合分析结果。 + + 返回结构专门给后台“AI 成本与策略中心”第一阶段使用: + 1. 先围绕 scene / backend / provider / model 做聚合; + 2. 重点回答成功率、平均耗时、失败次数、最近错误; + 3. 暂不承诺长期留存,只服务于最近窗口的运行分析。 + """ + with cls._runtime_lock: + rows = list(cls._runtime_metrics) + + snapshot = cls.get_runtime_snapshot() + return { + **snapshot, + "rows": rows, + "by_scene": cls._build_runtime_breakdown_rows(rows, "scene"), + "by_backend": cls._build_runtime_breakdown_rows(rows, "backend"), + "by_provider": cls._build_runtime_breakdown_rows(rows, "provider"), + "by_model": cls._build_runtime_breakdown_rows(rows, "model"), + } + def is_available(self) -> bool: if not self.enabled: return False