增强LLM运行分析与按维度统计视图
This commit is contained in:
@@ -395,6 +395,46 @@ def _extract_ai_runtime_snapshot() -> dict:
|
||||
return snapshot
|
||||
|
||||
|
||||
def _build_llm_runtime_analytics_payload() -> dict:
|
||||
"""构建 LLM 最近窗口分析载荷。
|
||||
|
||||
为什么单独抽这个函数:
|
||||
1. 首页 AI 卡片只看摘要,而 `system_llm` 页面需要更细粒度的分组表;
|
||||
2. 两边都依赖同一套运行时快照,避免把 scene/backend/provider/model 聚合逻辑散在多个接口里;
|
||||
3. 第一阶段先做“最近窗口分析”,让管理员快速识别慢场景、失败模型和异常后端。
|
||||
"""
|
||||
runtime_breakdown = UnifiedLLMClient.get_runtime_breakdown() or {}
|
||||
overview_snapshot = _extract_ai_runtime_snapshot()
|
||||
catalog_summary = _extract_llm_catalog_summary()
|
||||
|
||||
return {
|
||||
"overview": {
|
||||
"window_size": _safe_int(runtime_breakdown.get("window_size")),
|
||||
"total_calls": _safe_int(runtime_breakdown.get("total_calls")),
|
||||
"success_calls": _safe_int(runtime_breakdown.get("success_calls")),
|
||||
"failed_calls": _safe_int(runtime_breakdown.get("failed_calls")),
|
||||
"success_rate": _safe_float(runtime_breakdown.get("success_rate")),
|
||||
"avg_latency_ms": _safe_float(runtime_breakdown.get("avg_latency_ms")),
|
||||
"last_error": str(runtime_breakdown.get("last_error") or "").strip(),
|
||||
"status": str(overview_snapshot.get("status") or "warning").strip(),
|
||||
"summary": str(overview_snapshot.get("summary") or "").strip(),
|
||||
"last_call": dict(runtime_breakdown.get("last_call") or {}),
|
||||
"provider_count": _safe_int(catalog_summary.get("provider_count")),
|
||||
"scene_count": _safe_int(catalog_summary.get("scene_count")),
|
||||
"target_count": _safe_int(catalog_summary.get("target_count")),
|
||||
"default_scene": str(catalog_summary.get("default_scene") or "").strip(),
|
||||
"default_backend": str(catalog_summary.get("default_backend") or "").strip(),
|
||||
"has_routing": bool(catalog_summary.get("has_routing")),
|
||||
},
|
||||
# 这里保留原始最近窗口明细,方便后续如果要做“最近 10 次调用”列表时直接复用。
|
||||
"recent_rows": runtime_breakdown.get("rows", []) or [],
|
||||
"by_scene": runtime_breakdown.get("by_scene", []) or [],
|
||||
"by_backend": runtime_breakdown.get("by_backend", []) or [],
|
||||
"by_provider": runtime_breakdown.get("by_provider", []) or [],
|
||||
"by_model": runtime_breakdown.get("by_model", []) or [],
|
||||
}
|
||||
|
||||
|
||||
def _extract_scheduler_runtime_snapshot() -> dict:
|
||||
"""聚合 async_job 运行态,生成首页任务调度摘要。
|
||||
|
||||
@@ -1115,6 +1155,26 @@ def get_system_llm_config():
|
||||
return jsonify({"success": False, "message": str(e)}), 500
|
||||
|
||||
|
||||
@system_bp.route('/api/system/llm_runtime_analytics', methods=['GET'])
|
||||
@login_required
|
||||
def get_system_llm_runtime_analytics():
|
||||
"""返回 LLM 最近窗口分析结果。
|
||||
|
||||
这里不主动发起探活请求,也不做持久化成本结算,只消费统一客户端已经记录的最近窗口埋点:
|
||||
1. 避免后台刷新页面反过来给 AI 服务制造额外压力;
|
||||
2. 先把“按场景/后端/模型看成功率与耗时”做扎实;
|
||||
3. 为后续真正的 token 成本中心预留接口形态。
|
||||
"""
|
||||
try:
|
||||
return jsonify({
|
||||
"success": True,
|
||||
"data": _build_llm_runtime_analytics_payload(),
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"读取 LLM 运行分析失败: {e}")
|
||||
return jsonify({"success": False, "message": str(e)}), 500
|
||||
|
||||
|
||||
@system_bp.route('/api/system/llm_config', methods=['POST'])
|
||||
@login_required
|
||||
def update_system_llm_config():
|
||||
|
||||
Reference in New Issue
Block a user