From 8957799b760d33fca91da615a02853816f636d6d Mon Sep 17 00:00:00 2001 From: liuwei Date: Wed, 6 May 2026 08:43:18 +0800 Subject: [PATCH] =?UTF-8?q?=E5=9B=9E=E6=BB=9A=E6=9C=AC=E5=9C=B0LLM?= =?UTF-8?q?=E8=BF=90=E8=A1=8C=E5=88=86=E6=9E=90=E5=B9=B6=E5=AF=B9=E9=BD=90?= =?UTF-8?q?=E8=BF=9C=E7=AB=AF=E5=9B=9E=E6=BB=9A=E7=8A=B6=E6=80=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- admin/dashboard/blueprints/system.py | 632 +++------------------- admin/dashboard/templates/system_llm.html | 354 +----------- docs/工程优化与Feature清单.md | 6 - utils/ai/unified_llm.py | 101 ---- 4 files changed, 92 insertions(+), 1001 deletions(-) diff --git a/admin/dashboard/blueprints/system.py b/admin/dashboard/blueprints/system.py index 4b10997..37db1ff 100644 --- a/admin/dashboard/blueprints/system.py +++ b/admin/dashboard/blueprints/system.py @@ -16,7 +16,6 @@ from utils.markdown_to_image import get_md2img_health_snapshot, warmup_md2img_br from utils.ai.llm_registry import LLMRegistry from base.plugin_common.plugin_interface import PluginStatus from utils.ai.unified_llm import UnifiedLLMClient -from utils.decorator.async_job import async_job # 创建系统信息蓝图 system_bp = Blueprint('system', __name__) @@ -43,506 +42,6 @@ def _save_system_yaml(config_obj: dict) -> None: yaml.safe_dump(config_obj, f, allow_unicode=True, sort_keys=False) -def _safe_int(value, default: int = 0) -> int: - """把数据库 / Redis 返回的字符串数字安全转成整数。""" - try: - if value in (None, ""): - return default - return int(float(value)) - except (TypeError, ValueError): - return default - - -def _safe_float(value, default: float = 0.0) -> float: - """把数据库 / Redis 返回的值安全转成浮点数。""" - try: - if value in (None, ""): - return default - return float(value) - except (TypeError, ValueError): - return default - - -def _format_bytes_to_mb(value: int) -> float: - """把字节数转换为 MB,保留两位小数便于首页摘要展示。""" - return round((_safe_float(value, 0.0) / 1024 / 1024), 2) - - -def _extract_mysql_runtime_snapshot(db_manager) -> dict: - """采集 MySQL 运行态摘要。 - - 首页目标不是替代 DBA 工具,而是让管理员一眼判断: - 1. 数据库是不是活着; - 2. 当前连接压力高不高; - 3. 当前库规模是否已经明显变大; - 4. 有没有必要继续深入到更专业的监控页排查。 - """ - snapshot = { - "status": "healthy", - "summary": "连接正常", - "database": db_manager.get_mysql_database_name(), - "version": "", - "threads_connected": 0, - "threads_running": 0, - "max_connections": 0, - "connection_usage_percent": 0.0, - "questions_per_second": 0.0, - "uptime_seconds": 0, - "table_count": 0, - "schema_size_mb": 0.0, - "slow_query_threshold_ms": db_manager.get_slow_query_threshold_ms(), - } - - mysql_conn = db_manager.get_mysql_connection() - try: - with mysql_conn.cursor(dictionary=True) as cursor: - # 基础探活与版本识别: - # 1. SELECT VERSION() 成本极低; - # 2. 相比只做 SELECT 1,它还能顺便拿到版本信息; - # 3. 首页卡片里显示版本,方便线上排查“是不是某台库版本不一致”。 - cursor.execute("SELECT VERSION() AS version, DATABASE() AS database_name") - version_row = cursor.fetchone() or {} - snapshot["version"] = str(version_row.get("version") or "").strip() - snapshot["database"] = str(version_row.get("database_name") or snapshot["database"] or "").strip() - - cursor.execute( - """ - SHOW GLOBAL STATUS - WHERE Variable_name IN ('Threads_connected', 'Threads_running', 'Questions', 'Uptime') - """ - ) - status_rows = cursor.fetchall() or [] - status_map = { - str(row.get("Variable_name") or "").strip(): row.get("Value") - for row in status_rows - } - - cursor.execute( - """ - SHOW GLOBAL VARIABLES - WHERE Variable_name IN ('max_connections') - """ - ) - variable_rows = cursor.fetchall() or [] - variable_map = { - str(row.get("Variable_name") or "").strip(): row.get("Value") - for row in variable_rows - } - - # information_schema 聚合虽然比 SELECT 1 重一点,但仍属于轻量级元信息查询: - # 1. 只在首页 30 秒级刷新一次,成本可接受; - # 2. 能直接给出当前业务库表数量与体量变化; - # 3. 对判断“是不是消息表膨胀导致后台变慢”很有帮助。 - cursor.execute( - """ - SELECT - COUNT(*) AS table_count, - COALESCE(SUM(data_length + index_length), 0) AS schema_size_bytes - FROM information_schema.tables - WHERE table_schema = DATABASE() - """ - ) - schema_row = cursor.fetchone() or {} - - snapshot["threads_connected"] = _safe_int(status_map.get("Threads_connected")) - snapshot["threads_running"] = _safe_int(status_map.get("Threads_running")) - snapshot["max_connections"] = _safe_int(variable_map.get("max_connections")) - snapshot["uptime_seconds"] = _safe_int(status_map.get("Uptime")) - total_questions = _safe_int(status_map.get("Questions")) - if snapshot["uptime_seconds"] > 0: - snapshot["questions_per_second"] = round(total_questions / snapshot["uptime_seconds"], 2) - if snapshot["max_connections"] > 0: - snapshot["connection_usage_percent"] = round( - (snapshot["threads_connected"] / snapshot["max_connections"]) * 100, - 1, - ) - snapshot["table_count"] = _safe_int(schema_row.get("table_count")) - snapshot["schema_size_mb"] = _format_bytes_to_mb(schema_row.get("schema_size_bytes")) - - if snapshot["connection_usage_percent"] >= 80 or snapshot["threads_running"] >= 12: - snapshot["status"] = "warning" - snapshot["summary"] = ( - f"连接压力偏高:已连接 {snapshot['threads_connected']} / {snapshot['max_connections']}," - f"运行中线程 {snapshot['threads_running']}" - ) - else: - snapshot["summary"] = ( - f"连接正常:已连接 {snapshot['threads_connected']} / {snapshot['max_connections'] or '-'}," - f"QPS {snapshot['questions_per_second']}" - ) - return snapshot - except Exception as mysql_error: - snapshot["status"] = "danger" - snapshot["summary"] = f"MySQL 探测失败: {mysql_error}" - return snapshot - finally: - mysql_conn.close() - - -def _extract_redis_runtime_snapshot(db_manager) -> dict: - """采集 Redis 运行态摘要。""" - redis_config = getattr(db_manager, "redis_config", {}) or {} - snapshot = { - "status": "healthy", - "summary": "连接正常", - "db_index": _safe_int(redis_config.get("db", 0)), - "key_count": 0, - "connected_clients": 0, - "blocked_clients": 0, - "ops_per_sec": 0, - "used_memory_human": "", - "used_memory_peak_human": "", - "memory_usage_percent": 0.0, - "uptime_seconds": 0, - "hit_rate_percent": 0.0, - } - - try: - redis_conn = db_manager.get_redis_connection() - redis_conn.ping() - info = redis_conn.info() or {} - snapshot["key_count"] = _safe_int(redis_conn.dbsize()) - snapshot["connected_clients"] = _safe_int(info.get("connected_clients")) - snapshot["blocked_clients"] = _safe_int(info.get("blocked_clients")) - snapshot["ops_per_sec"] = _safe_int(info.get("instantaneous_ops_per_sec")) - snapshot["used_memory_human"] = str(info.get("used_memory_human") or "").strip() - snapshot["used_memory_peak_human"] = str(info.get("used_memory_peak_human") or "").strip() - snapshot["uptime_seconds"] = _safe_int(info.get("uptime_in_seconds")) - - maxmemory = _safe_int(info.get("maxmemory")) - used_memory = _safe_int(info.get("used_memory")) - if maxmemory > 0: - snapshot["memory_usage_percent"] = round((used_memory / maxmemory) * 100, 1) - - keyspace_hits = _safe_int(info.get("keyspace_hits")) - keyspace_misses = _safe_int(info.get("keyspace_misses")) - if (keyspace_hits + keyspace_misses) > 0: - snapshot["hit_rate_percent"] = round( - (keyspace_hits / (keyspace_hits + keyspace_misses)) * 100, - 1, - ) - - if snapshot["blocked_clients"] > 0 or snapshot["memory_usage_percent"] >= 80: - snapshot["status"] = "warning" - snapshot["summary"] = ( - f"缓存压力需关注:keys {snapshot['key_count']}," - f"clients {snapshot['connected_clients']},ops/s {snapshot['ops_per_sec']}" - ) - else: - snapshot["summary"] = ( - f"缓存正常:keys {snapshot['key_count']}," - f"clients {snapshot['connected_clients']},ops/s {snapshot['ops_per_sec']}" - ) - return snapshot - except Exception as redis_error: - snapshot["status"] = "danger" - snapshot["summary"] = f"Redis 探测失败: {redis_error}" - return snapshot - - -def _parse_snapshot_datetime(value: str | None) -> datetime | None: - """把首页摘要里常用的时间字符串安全转换为 datetime。""" - text = str(value or "").strip() - if not text: - return None - try: - return datetime.strptime(text, "%Y-%m-%d %H:%M:%S") - except ValueError: - return None - - -def _count_enabled_runtime_items(items) -> int: - """统计启用项数量。 - - 兼容原因: - 1. 新版目录模型里 providers/backends/scenes 可能是 dict; - 2. 后台页面某些兜底逻辑里也可能给出 list; - 3. 旧配置没有 enabled 字段时,直接按存在即计数。 - """ - rows = [] - if isinstance(items, dict): - rows = list(items.values()) - elif isinstance(items, list): - rows = list(items) - count = 0 - for row in rows: - if not isinstance(row, dict): - continue - if "enabled" not in row or bool(row.get("enabled", True)): - count += 1 - return count - - -def _extract_llm_catalog_summary() -> dict: - """提取首页 LLM 路由配置摘要。 - - 这里不做真实调用探测,只回答两个问题: - 1. 运行时有没有可用的场景与目标; - 2. 管理员当前看到的调用记录,大致落到了哪一套路由上。 - """ - try: - catalog = LLMRegistry.get_catalog() or {} - if catalog: - providers = catalog.get("providers", {}) or {} - dify_apps = catalog.get("dify_apps", {}) or {} - backends = catalog.get("backends", {}) or {} - scenes = catalog.get("scenes", {}) or {} - default_scene = str(catalog.get("default_scene") or "").strip() - default_backend = str(LLMRegistry.get_scene_backend_name(default_scene) or "").strip() if default_scene else "" - return { - "provider_count": _count_enabled_runtime_items(providers), - "scene_count": _count_enabled_runtime_items(scenes), - "target_count": _count_enabled_runtime_items(backends) + _count_enabled_runtime_items(dify_apps), - "default_scene": default_scene, - "default_backend": default_backend, - "has_routing": _count_enabled_runtime_items(scenes) > 0, - } - - # 目录模型不存在时回退到 legacy 视图,至少让首页知道“有没有基础路由配置”。 - legacy_llm = LLMRegistry.get_llm_config() or {} - scenes = legacy_llm.get("scenes", {}) or {} - backends = legacy_llm.get("backends", {}) or {} - default_backend = str(legacy_llm.get("default_backend") or "").strip() - return { - "provider_count": 0, - "scene_count": len(scenes) if isinstance(scenes, dict) else 0, - "target_count": len(backends) if isinstance(backends, dict) else 0, - "default_scene": "", - "default_backend": default_backend, - "has_routing": bool(scenes) or bool(default_backend), - } - except Exception as llm_catalog_error: - logger.warning(f"提取 LLM 路由摘要失败: {llm_catalog_error}") - return { - "provider_count": 0, - "scene_count": 0, - "target_count": 0, - "default_scene": "", - "default_backend": "", - "has_routing": False, - } - - -def _extract_ai_runtime_snapshot() -> dict: - """构建首页 LLM 运行态摘要。 - - 设计原则: - 1. 首页只展示“最近调用窗口”的被动观测结果,不主动发请求探活; - 2. 把最近调用和静态路由配置拼在一起,避免管理员只看到“成功/失败”却不知道走的是哪条链路; - 3. 如果近期没有调用,也明确区分“未配置”和“已配置但当前空闲”。 - """ - runtime_snapshot = UnifiedLLMClient.get_runtime_snapshot() or {} - last_call = dict(runtime_snapshot.get("last_call") or {}) - catalog_summary = _extract_llm_catalog_summary() - - total_calls = _safe_int(runtime_snapshot.get("total_calls")) - failed_calls = _safe_int(runtime_snapshot.get("failed_calls")) - success_rate = _safe_float(runtime_snapshot.get("success_rate")) - avg_latency_ms = _safe_float(runtime_snapshot.get("avg_latency_ms")) - last_error = str(runtime_snapshot.get("last_error") or "").strip() - - snapshot = { - **runtime_snapshot, - "last_call": last_call, - "provider_count": catalog_summary.get("provider_count", 0), - "scene_count": catalog_summary.get("scene_count", 0), - "target_count": catalog_summary.get("target_count", 0), - "default_scene": catalog_summary.get("default_scene", ""), - "default_backend": catalog_summary.get("default_backend", ""), - "has_routing": bool(catalog_summary.get("has_routing")), - "last_provider": str(last_call.get("provider") or "").strip(), - "last_backend": str(last_call.get("backend") or "").strip(), - "last_scene": str(last_call.get("scene") or "").strip(), - "last_model": str(last_call.get("model") or "").strip(), - "last_timestamp": str(last_call.get("timestamp") or "").strip(), - "last_latency_ms": _safe_float(last_call.get("latency_ms")), - } - - if not snapshot["has_routing"]: - snapshot["status"] = "warning" - snapshot["summary"] = "当前未发现完整的 LLM 路由配置,建议先检查默认场景与后端绑定" - return snapshot - - if total_calls <= 0: - snapshot["status"] = "warning" - snapshot["summary"] = ( - f"已配置 {snapshot['scene_count']} 个场景、{snapshot['target_count']} 个目标," - "最近窗口内暂无统一 LLM 调用记录" - ) - return snapshot - - if failed_calls >= total_calls and total_calls > 0: - snapshot["status"] = "danger" - snapshot["summary"] = ( - f"最近 {total_calls} 次调用全部失败,成功率 {success_rate:.2f}%," - f"平均耗时 {avg_latency_ms:.2f}ms" - ) - return snapshot - - if failed_calls > 0 or last_error: - snapshot["status"] = "warning" - snapshot["summary"] = ( - f"最近 {total_calls} 次调用中失败 {failed_calls} 次,成功率 {success_rate:.2f}%," - f"平均耗时 {avg_latency_ms:.2f}ms" - ) - return snapshot - - snapshot["status"] = "healthy" - snapshot["summary"] = ( - f"最近 {total_calls} 次调用全部成功,成功率 {success_rate:.2f}%," - f"平均耗时 {avg_latency_ms:.2f}ms" - ) - return snapshot - - -def _build_llm_runtime_analytics_payload() -> dict: - """构建 LLM 最近窗口分析载荷。 - - 为什么单独抽这个函数: - 1. 首页 AI 卡片只看摘要,而 `system_llm` 页面需要更细粒度的分组表; - 2. 两边都依赖同一套运行时快照,避免把 scene/backend/provider/model 聚合逻辑散在多个接口里; - 3. 第一阶段先做“最近窗口分析”,让管理员快速识别慢场景、失败模型和异常后端。 - """ - runtime_breakdown = UnifiedLLMClient.get_runtime_breakdown() or {} - overview_snapshot = _extract_ai_runtime_snapshot() - catalog_summary = _extract_llm_catalog_summary() - - return { - "overview": { - "window_size": _safe_int(runtime_breakdown.get("window_size")), - "total_calls": _safe_int(runtime_breakdown.get("total_calls")), - "success_calls": _safe_int(runtime_breakdown.get("success_calls")), - "failed_calls": _safe_int(runtime_breakdown.get("failed_calls")), - "success_rate": _safe_float(runtime_breakdown.get("success_rate")), - "avg_latency_ms": _safe_float(runtime_breakdown.get("avg_latency_ms")), - "last_error": str(runtime_breakdown.get("last_error") or "").strip(), - "status": str(overview_snapshot.get("status") or "warning").strip(), - "summary": str(overview_snapshot.get("summary") or "").strip(), - "last_call": dict(runtime_breakdown.get("last_call") or {}), - "provider_count": _safe_int(catalog_summary.get("provider_count")), - "scene_count": _safe_int(catalog_summary.get("scene_count")), - "target_count": _safe_int(catalog_summary.get("target_count")), - "default_scene": str(catalog_summary.get("default_scene") or "").strip(), - "default_backend": str(catalog_summary.get("default_backend") or "").strip(), - "has_routing": bool(catalog_summary.get("has_routing")), - }, - # 这里保留原始最近窗口明细,方便后续如果要做“最近 10 次调用”列表时直接复用。 - "recent_rows": runtime_breakdown.get("rows", []) or [], - "by_scene": runtime_breakdown.get("by_scene", []) or [], - "by_backend": runtime_breakdown.get("by_backend", []) or [], - "by_provider": runtime_breakdown.get("by_provider", []) or [], - "by_model": runtime_breakdown.get("by_model", []) or [], - } - - -def _extract_scheduler_runtime_snapshot() -> dict: - """聚合 async_job 运行态,生成首页任务调度摘要。 - - 这里的目标不是替代完整任务页,而是回答管理员最常问的几件事: - 1. 任务有没有正常装载; - 2. 是否存在失败或非法调度; - 3. 下一次任务大概何时执行; - 4. 当前更多是系统任务,还是插件任务在跑。 - """ - runtime_rows = async_job.get_jobs_snapshot() - next_run_candidates = [] - failed_rows = [] - system_job_count = 0 - plugin_job_count = 0 - - for row in runtime_rows: - job_key = str(row.get("job_key") or "").strip() - owner_name = str(row.get("owner_name") or "system").strip().lower() - next_run_at = _parse_snapshot_datetime(row.get("next_run_at")) - last_status = str(row.get("last_status") or "").strip().lower() - - if job_key.startswith("plugin_schedule:") or owner_name != "system": - plugin_job_count += 1 - else: - system_job_count += 1 - - if bool(row.get("enabled")) and next_run_at: - next_run_candidates.append(next_run_at) - if last_status in {"failed", "invalid_schedule"}: - failed_rows.append(row) - - latest_failed_row = {} - if failed_rows: - failed_rows.sort( - key=lambda row: ( - _parse_snapshot_datetime(row.get("updated_at")) - or _parse_snapshot_datetime(row.get("last_run_at")) - or datetime.min - ), - reverse=True, - ) - latest_failed_row = failed_rows[0] - - invalid_jobs = sum( - 1 for row in runtime_rows if str(row.get("last_status") or "").strip().lower() == "invalid_schedule" - ) - total_jobs = len(runtime_rows) - enabled_jobs = sum(1 for row in runtime_rows if bool(row.get("enabled"))) - running_jobs = sum(1 for row in runtime_rows if bool(row.get("running"))) - failed_jobs = len(failed_rows) - paused_jobs = total_jobs - enabled_jobs - never_run_jobs = sum(1 for row in runtime_rows if str(row.get("last_status") or "").strip().lower() == "never") - next_run_at_text = min(next_run_candidates).strftime("%Y-%m-%d %H:%M:%S") if next_run_candidates else "" - latest_failed_error = str(latest_failed_row.get("last_error") or "").strip() - if len(latest_failed_error) > 120: - latest_failed_error = f"{latest_failed_error[:117]}..." - - snapshot = { - "status": "healthy", - "summary": "任务调度运行正常", - "total_jobs": total_jobs, - "enabled_jobs": enabled_jobs, - "running_jobs": running_jobs, - "failed_jobs": failed_jobs, - "invalid_jobs": invalid_jobs, - "paused_jobs": paused_jobs, - "never_run_jobs": never_run_jobs, - "system_job_count": system_job_count, - "plugin_job_count": plugin_job_count, - "next_run_at": next_run_at_text, - "latest_failed_job_name": str(latest_failed_row.get("name") or "").strip(), - "latest_failed_error": latest_failed_error, - } - - if total_jobs <= 0: - snapshot["status"] = "warning" - snapshot["summary"] = "当前没有加载任何定时任务" - return snapshot - - if invalid_jobs > 0: - snapshot["status"] = "danger" - snapshot["summary"] = f"发现 {invalid_jobs} 个任务调度配置非法,建议立即检查任务页" - return snapshot - - if failed_jobs > 0: - snapshot["status"] = "warning" - snapshot["summary"] = ( - f"最近有 {failed_jobs} 个任务执行失败," - f"下一次执行 {next_run_at_text or '暂未计算'}" - ) - return snapshot - - if enabled_jobs <= 0: - snapshot["status"] = "warning" - snapshot["summary"] = "任务已加载,但当前没有启用中的调度任务" - return snapshot - - if running_jobs > 0: - snapshot["summary"] = ( - f"当前有 {running_jobs} 个任务执行中," - f"下一次执行 {next_run_at_text or '暂未计算'}" - ) - return snapshot - - snapshot["summary"] = f"已启用 {enabled_jobs} 个任务,下一次执行 {next_run_at_text or '暂未计算'}" - return snapshot - - def _legacy_llm_to_catalog(legacy_llm: dict) -> dict: """把旧 llm(backends/scenes) 结构转换为新目录结构(仅用于兜底展示)。 @@ -906,11 +405,45 @@ def api_system_health_summary(): _, recent_error_count = server.stats_db.get_error_logs(days=1, page=1, limit=1) # 基础设施健康: - # 1. MySQL / Redis 都在这里做“首页摘要级”探测,而不是完整深度巡检; - # 2. 除了连通性,还补充少量负载指标,方便管理员快速判断是否需要继续下钻; + # 1. MySQL 用最轻量的 SELECT 1 做可用性探测; + # 2. Redis 用 PING 验证连接池当前是否可拿到可用连接; # 3. 即使探测失败也只反馈到看板,不影响主接口整体返回。 - mysql_snapshot = _extract_mysql_runtime_snapshot(server.db_manager) - redis_snapshot = _extract_redis_runtime_snapshot(server.db_manager) + mysql_status = "healthy" + mysql_summary = "连接正常" + try: + mysql_conn = server.db_manager.get_mysql_connection() + try: + with mysql_conn.cursor() as cursor: + cursor.execute("SELECT 1") + cursor.fetchone() + finally: + mysql_conn.close() + except Exception as mysql_error: + mysql_status = "danger" + mysql_summary = f"MySQL 探测失败: {mysql_error}" + + redis_status = "healthy" + redis_summary = "连接正常" + try: + redis_conn = server.db_manager.get_redis_connection() + redis_conn.ping() + except Exception as redis_error: + redis_status = "danger" + redis_summary = f"Redis 探测失败: {redis_error}" + + # md2img 健康快照已经有现成实现,这里只做聚合,不主动预热运行时。 + md2img_snapshot = get_md2img_health_snapshot(ensure_runtime=False) or {} + browser_ready = bool( + md2img_snapshot.get("browser_ready") + or md2img_snapshot.get("playwright_ready") + or md2img_snapshot.get("ready") + ) + runtime_ready = bool( + md2img_snapshot.get("runtime_ready") + or md2img_snapshot.get("runtime_initialized") + or md2img_snapshot.get("initialized") + ) + md2img_healthy = runtime_ready and browser_ready # 首页只需要“够判断”的轻量结论,因此统一产出 status + summary 文本,前端无需重复拼装业务规则。 robot_running = bool(getattr(robot, "ipad_running", False)) @@ -937,11 +470,37 @@ def api_system_health_summary(): error_status = "healthy" error_summary = "近 24 小时未记录到异常" - # 首页 AI 卡片升级为“运行态 + 路由摘要”,仍然保持被动观测,不主动探活。 - ai_runtime = _extract_ai_runtime_snapshot() + if md2img_healthy: + md2img_status = "healthy" + md2img_summary = "运行时与浏览器均已就绪" + elif runtime_ready or browser_ready: + md2img_status = "warning" + md2img_summary = "运行时部分可用,建议检查预热状态" + else: + md2img_status = "danger" + md2img_summary = "运行时未就绪,相关转图能力可能不可用" - # Markdown 转图更适合保留在专门页面里排障,首页右侧改成更通用的任务调度摘要。 - scheduler_runtime = _extract_scheduler_runtime_snapshot() + # AI 运行态: + # 1. 统一从 UnifiedLLMClient 最近调用窗口读取,避免各插件单独维护监控数据; + # 2. 若当前窗口还没有调用记录,就明确返回“暂无调用”,避免误判成异常。 + ai_runtime = UnifiedLLMClient.get_runtime_snapshot() + ai_total_calls = int(ai_runtime.get("total_calls") or 0) + ai_failed_calls = int(ai_runtime.get("failed_calls") or 0) + if ai_total_calls <= 0: + ai_status = "warning" + ai_summary = "最近窗口内暂无统一 LLM 调用记录" + elif ai_failed_calls > 0: + ai_status = "warning" + ai_summary = ( + f"最近 {ai_total_calls} 次调用中失败 {ai_failed_calls} 次," + f"平均耗时 {ai_runtime.get('avg_latency_ms', 0)}ms" + ) + else: + ai_status = "healthy" + ai_summary = ( + f"最近 {ai_total_calls} 次调用全部成功," + f"平均耗时 {ai_runtime.get('avg_latency_ms', 0)}ms" + ) return jsonify({ "success": True, @@ -965,28 +524,33 @@ def api_system_health_summary(): "summary": error_summary, }, "infrastructure": { - "status": ( - "danger" - if "danger" in {mysql_snapshot.get("status"), redis_snapshot.get("status")} - else ("warning" if "warning" in {mysql_snapshot.get("status"), redis_snapshot.get("status")} else "healthy") - ), + "status": "healthy" if mysql_status == "healthy" and redis_status == "healthy" else "danger", "summary": ( "MySQL / Redis 均正常" - if mysql_snapshot.get("status") == "healthy" and redis_snapshot.get("status") == "healthy" - else ( - "基础设施连接正常,但部分负载指标需要关注" - if mysql_snapshot.get("status") != "danger" and redis_snapshot.get("status") != "danger" - else "存在基础设施连接异常" - ) + if mysql_status == "healthy" and redis_status == "healthy" + else "存在基础设施连接异常" ), - "mysql": mysql_snapshot, - "redis": redis_snapshot, + "mysql": { + "status": mysql_status, + "summary": mysql_summary, + }, + "redis": { + "status": redis_status, + "summary": redis_summary, + }, }, "ai_runtime": { + "status": ai_status, + "summary": ai_summary, **ai_runtime, }, - "scheduler": { - **scheduler_runtime, + "md2img": { + "status": md2img_status, + "healthy": md2img_healthy, + "runtime_ready": runtime_ready, + "browser_ready": browser_ready, + "summary": md2img_summary, + "detail": md2img_snapshot, }, } }) @@ -1155,26 +719,6 @@ def get_system_llm_config(): return jsonify({"success": False, "message": str(e)}), 500 -@system_bp.route('/api/system/llm_runtime_analytics', methods=['GET']) -@login_required -def get_system_llm_runtime_analytics(): - """返回 LLM 最近窗口分析结果。 - - 这里不主动发起探活请求,也不做持久化成本结算,只消费统一客户端已经记录的最近窗口埋点: - 1. 避免后台刷新页面反过来给 AI 服务制造额外压力; - 2. 先把“按场景/后端/模型看成功率与耗时”做扎实; - 3. 为后续真正的 token 成本中心预留接口形态。 - """ - try: - return jsonify({ - "success": True, - "data": _build_llm_runtime_analytics_payload(), - }) - except Exception as e: - logger.error(f"读取 LLM 运行分析失败: {e}") - return jsonify({"success": False, "message": str(e)}), 500 - - @system_bp.route('/api/system/llm_config', methods=['POST']) @login_required def update_system_llm_config(): diff --git a/admin/dashboard/templates/system_llm.html b/admin/dashboard/templates/system_llm.html index 53dc82a..3604102 100644 --- a/admin/dashboard/templates/system_llm.html +++ b/admin/dashboard/templates/system_llm.html @@ -8,181 +8,14 @@
LLM Catalog

LLM目录配置

-

按 Provider 模板、Dify 应用、Scene 绑定三层维护,并结合最近窗口运行分析判断哪条 AI 路由更慢、更容易失败。

+

按 Provider 模板、Dify 应用、Scene 绑定三层维护,减少重复配置和切换成本。

- 刷新 + 刷新 保存配置
- -
-
-

AI运行分析

-

基于统一 LLM 客户端最近窗口埋点做被动观测,不额外发起探活请求。

-
-
- 窗口容量:{% raw %}{{ runtimeAnalytics.overview.window_size || 0 }}{% endraw %} - 默认场景:{% raw %}{{ runtimeAnalytics.overview.default_scene || '-' }}{% endraw %} - 默认目标:{% raw %}{{ runtimeAnalytics.overview.default_backend || '-' }}{% endraw %} -
-
- -
-
-
最近调用
-
{% raw %}{{ runtimeAnalytics.overview.total_calls || 0 }}{% endraw %}
-
成功 {% raw %}{{ runtimeAnalytics.overview.success_calls || 0 }}{% endraw %} / 失败 {% raw %}{{ runtimeAnalytics.overview.failed_calls || 0 }}{% endraw %}
-
-
-
成功率
-
{% raw %}{{ formatPercent(runtimeAnalytics.overview.success_rate) }}{% endraw %}
-
按最近窗口实时汇总
-
-
-
平均耗时
-
{% raw %}{{ formatLatency(runtimeAnalytics.overview.avg_latency_ms) }}{% endraw %}
-
用于快速识别慢场景
-
-
-
路由规模
-
{% raw %}{{ runtimeAnalytics.overview.scene_count || 0 }}{% endraw %}
-
场景数 / 目标数 {% raw %}{{ runtimeAnalytics.overview.target_count || 0 }}{% endraw %}
-
-
- -
-
- - {% raw %}{{ statusText(runtimeAnalytics.overview.status) }}{% endraw %} - - {% raw %}{{ runtimeAnalytics.overview.summary || '最近窗口内暂无统一 LLM 调用记录' }}{% endraw %} -
-
- Provider 模板:{% raw %}{{ runtimeAnalytics.overview.provider_count || 0 }}{% endraw %} - 最近场景:{% raw %}{{ runtimeAnalytics.overview.last_call.scene || '-' }}{% endraw %} - 最近后端:{% raw %}{{ runtimeAnalytics.overview.last_call.backend || '-' }}{% endraw %} - 最近模型:{% raw %}{{ runtimeAnalytics.overview.last_call.model || '-' }}{% endraw %} - 最近时间:{% raw %}{{ runtimeAnalytics.overview.last_call.timestamp || '-' }}{% endraw %} -
-
- 最近错误: - {% raw %}{{ runtimeAnalytics.overview.last_error }}{% endraw %} -
-
- -
- -
-
-

按场景统计

-

定位哪个业务场景最常调用、最容易失败。

-
-
- - - - - - - - - - - - - - -
- - -
-
-

按后端统计

-

观察 backend 层是否存在集中失败或慢请求。

-
-
- - - - - - - - - - - - - - -
- - -
-
-

按 Provider 统计

-

区分 Dify 与 OpenAI Compatible 等不同接入形态的表现。

-
-
- - - - - - - - - - - - - - -
- - -
-
-

按模型统计

-

帮助判断是否需要按场景切换模型或做降级策略。

-
-
- - - - - - - - - - - - - - -
-
-
-
@@ -382,30 +215,6 @@ currentView: '17', configPath: '', topologyRows: [], - runtimeAnalyticsLoading: false, - runtimeAnalytics: { - overview: { - window_size: 0, - total_calls: 0, - success_calls: 0, - failed_calls: 0, - success_rate: 0, - avg_latency_ms: 0, - last_error: '', - status: 'warning', - summary: '', - last_call: {}, - provider_count: 0, - scene_count: 0, - target_count: 0, - default_scene: '', - default_backend: '' - }, - by_scene: [], - by_backend: [], - by_provider: [], - by_model: [] - }, catalog: { default_scene: '', providers: [], @@ -431,45 +240,12 @@ }, mounted() { this.currentView = '17'; - this.reloadPageData(); + this.loadLlmConfig(); }, methods: { newUid() { return `${Date.now()}_${Math.random().toString(36).slice(2, 8)}`; }, - // 统一刷新配置与运行分析,避免管理员点一次“刷新”只能看到半套信息。 - async reloadPageData() { - await Promise.all([ - this.loadLlmConfig(), - this.loadRuntimeAnalytics() - ]); - }, - statusTagType(status) { - if (status === 'healthy') { - return 'success'; - } - if (status === 'danger') { - return 'danger'; - } - return 'warning'; - }, - statusText(status) { - if (status === 'healthy') { - return '运行正常'; - } - if (status === 'danger') { - return '需要立即处理'; - } - return '需要关注'; - }, - formatPercent(value) { - const numeric = Number(value || 0); - return `${numeric.toFixed(2)}%`; - }, - formatLatency(value) { - const numeric = Number(value || 0); - return `${numeric.toFixed(2)} ms`; - }, // Provider 模板:只放公共字段,避免 Dify 每个应用重复填写。 newProvider() { return { @@ -620,46 +396,6 @@ } return this.difyAppNameOptions; }, - async loadRuntimeAnalytics() { - this.runtimeAnalyticsLoading = true; - try { - const response = await axios.get('/api/system/llm_runtime_analytics'); - if (!response.data.success) { - this.$message.error(response.data.message || '读取 AI 运行分析失败'); - return; - } - const data = response.data.data || {}; - const overview = data.overview || {}; - // 这里做前端兜底结构归一化,避免后端未来新增字段时影响当前页面渲染。 - this.runtimeAnalytics = { - overview: { - window_size: overview.window_size || 0, - total_calls: overview.total_calls || 0, - success_calls: overview.success_calls || 0, - failed_calls: overview.failed_calls || 0, - success_rate: overview.success_rate || 0, - avg_latency_ms: overview.avg_latency_ms || 0, - last_error: overview.last_error || '', - status: overview.status || 'warning', - summary: overview.summary || '', - last_call: overview.last_call || {}, - provider_count: overview.provider_count || 0, - scene_count: overview.scene_count || 0, - target_count: overview.target_count || 0, - default_scene: overview.default_scene || '', - default_backend: overview.default_backend || '' - }, - by_scene: data.by_scene || [], - by_backend: data.by_backend || [], - by_provider: data.by_provider || [], - by_model: data.by_model || [] - }; - } catch (error) { - this.$message.error(error.response?.data?.message || '读取 AI 运行分析失败'); - } finally { - this.runtimeAnalyticsLoading = false; - } - }, async loadLlmConfig() { try { const response = await axios.get('/api/system/llm_config'); @@ -738,7 +474,7 @@ const response = await axios.post('/api/system/llm_config', payload); if (response.data.success) { this.$message.success(response.data.message || '保存成功'); - this.reloadPageData(); + this.loadLlmConfig(); } else { this.$message.error(response.data.message || '保存失败'); } @@ -777,86 +513,6 @@ gap: 8px; flex-wrap: wrap; } - .runtime-summary-grid { - display: grid; - grid-template-columns: repeat(4, minmax(180px, 1fr)); - gap: 14px; - margin-bottom: 16px; - } - .runtime-summary-card { - padding: 16px 18px; - border-radius: 16px; - border: 1px solid rgba(148,163,184,0.18); - background: linear-gradient(180deg, rgba(255,255,255,0.96), rgba(241,245,249,0.88)); - } - .summary-label { - font-size: 12px; - color: #64748b; - margin-bottom: 8px; - } - .summary-value { - font-size: 28px; - line-height: 1; - font-weight: 700; - color: #0f172a; - margin-bottom: 8px; - } - .summary-hint { - font-size: 12px; - color: #475569; - } - .runtime-overview-panel { - padding: 16px 18px; - border-radius: 16px; - background: rgba(15, 23, 42, 0.03); - border: 1px solid rgba(148,163,184,0.14); - margin-bottom: 18px; - } - .runtime-status-row { - display: flex; - align-items: center; - gap: 10px; - flex-wrap: wrap; - margin-bottom: 10px; - } - .runtime-overview-text { - color: #0f172a; - font-size: 14px; - } - .runtime-overview-meta { - display: flex; - gap: 12px 18px; - flex-wrap: wrap; - color: #64748b; - font-size: 12px; - } - .runtime-error-box { - margin-top: 12px; - padding: 10px 12px; - border-radius: 10px; - background: rgba(239, 68, 68, 0.08); - color: #991b1b; - font-size: 12px; - line-height: 1.6; - } - .runtime-table-grid { - display: grid; - grid-template-columns: repeat(2, minmax(0, 1fr)); - gap: 16px; - } - .analytics-card { - border: 1px solid rgba(148,163,184,0.16); - border-radius: 16px; - } - .runtime-table-header h4 { - font-size: 16px; - margin-bottom: 4px; - color: #0f172a; - } - .runtime-table-header p { - color: #64748b; - font-size: 12px; - } .section-list { display: flex; flex-direction: column; gap: 12px; } .entry-card { border: 1px solid rgba(148,163,184,0.16); border-radius: 14px; } .entry-header { display: flex; align-items: center; justify-content: space-between; gap: 12px; } @@ -885,8 +541,6 @@ @media (max-width: 960px) { .page-hero { flex-direction: column; align-items: flex-start; } .workspace-header { flex-direction: column; align-items: flex-start; } - .runtime-summary-grid { grid-template-columns: 1fr; } - .runtime-table-grid { grid-template-columns: 1fr; } .entry-grid { grid-template-columns: 1fr; } .scene-row { grid-template-columns: 1fr; } } diff --git a/docs/工程优化与Feature清单.md b/docs/工程优化与Feature清单.md index 0478d49..c4e8701 100644 --- a/docs/工程优化与Feature清单.md +++ b/docs/工程优化与Feature清单.md @@ -524,12 +524,6 @@ - 让 AI 能力更可控、更可衡量 -当前进展: - -- 第一阶段已完成:后台 `LLM目录配置` 页面已补充“AI运行分析”区块,可查看最近窗口内统一 LLM 调用的成功率、平均耗时、失败次数与最近错误 -- 第一阶段已完成:已支持按 `scene / backend / provider / model` 四个维度聚合最近窗口调用数据,便于快速识别慢场景、异常后端与高失败模型 -- 当前仍以“最近窗口运行分析”为主,暂未引入持久化 token 成本结算;后续可在确认治理需求后继续扩展预算、告警与降级策略 - 建议内容: - 统计各插件 token 消耗 diff --git a/utils/ai/unified_llm.py b/utils/ai/unified_llm.py index 3ebf745..246aed1 100644 --- a/utils/ai/unified_llm.py +++ b/utils/ai/unified_llm.py @@ -109,107 +109,6 @@ class UnifiedLLMClient: "last_error": last_error, } - @staticmethod - def _normalize_runtime_dimension_value(value: Any, fallback_label: str = "(未标记)") -> str: - """把分组维度统一格式化,避免后台表格里出现空白 key。 - - 这里保留一个显式的“未标记”占位,有两个目的: - 1. 便于管理员快速发现是哪个插件/场景没有正确传 scene、backend、model; - 2. 比直接丢弃空值更安全,避免分析数据被“悄悄吃掉”。 - """ - text = str(value or "").strip() - return text or fallback_label - - @classmethod - def _build_runtime_breakdown_rows( - cls, - rows: List[Dict[str, Any]], - dimension: str, - fallback_label: str = "(未标记)", - ) -> List[Dict[str, Any]]: - """按指定维度聚合最近窗口调用记录。 - - 设计说明: - 1. 这里只聚合最近窗口内存数据,不引入新表,也不做持久化成本结算; - 2. 第一阶段目标是先让管理员看见“哪类调用更慢、更容易失败”; - 3. 等后续确认成本治理真的需要时,再把 token/金额沉淀到持久化表里。 - """ - grouped_rows: Dict[str, Dict[str, Any]] = {} - - for row in rows: - group_key = cls._normalize_runtime_dimension_value(row.get(dimension), fallback_label) - metric_row = grouped_rows.setdefault( - group_key, - { - "key": group_key, - "dimension": dimension, - "total_calls": 0, - "success_calls": 0, - "failed_calls": 0, - "latency_sum_ms": 0.0, - "avg_latency_ms": 0.0, - "success_rate": 0.0, - "last_call_at": "", - "last_trace_id": "", - "last_error": "", - }, - ) - - metric_row["total_calls"] += 1 - if bool(row.get("success")): - metric_row["success_calls"] += 1 - else: - metric_row["failed_calls"] += 1 - - metric_row["latency_sum_ms"] += float(row.get("latency_ms") or 0.0) - - # deque 本身按时间顺序追加,因此后遍历到的同组记录就是更“新”的一次调用。 - # 这里直接覆盖最近调用信息,成本低,也足够支撑后台最近窗口分析表。 - metric_row["last_call_at"] = str(row.get("timestamp") or "").strip() - metric_row["last_trace_id"] = str(row.get("trace_id") or "").strip() - if not bool(row.get("success")) and row.get("error"): - metric_row["last_error"] = str(row.get("error") or "").strip() - - result_rows: List[Dict[str, Any]] = [] - for item in grouped_rows.values(): - total_calls = int(item.get("total_calls") or 0) - success_calls = int(item.get("success_calls") or 0) - item["avg_latency_ms"] = round((item.get("latency_sum_ms", 0.0) / total_calls), 2) if total_calls else 0.0 - item["success_rate"] = round((success_calls / total_calls) * 100, 2) if total_calls else 0.0 - item.pop("latency_sum_ms", None) - result_rows.append(item) - - return sorted( - result_rows, - key=lambda item: ( - -int(item.get("total_calls") or 0), - -int(item.get("failed_calls") or 0), - str(item.get("key") or ""), - ), - ) - - @classmethod - def get_runtime_breakdown(cls) -> Dict[str, Any]: - """返回最近窗口 LLM 调用的多维度聚合分析结果。 - - 返回结构专门给后台“AI 成本与策略中心”第一阶段使用: - 1. 先围绕 scene / backend / provider / model 做聚合; - 2. 重点回答成功率、平均耗时、失败次数、最近错误; - 3. 暂不承诺长期留存,只服务于最近窗口的运行分析。 - """ - with cls._runtime_lock: - rows = list(cls._runtime_metrics) - - snapshot = cls.get_runtime_snapshot() - return { - **snapshot, - "rows": rows, - "by_scene": cls._build_runtime_breakdown_rows(rows, "scene"), - "by_backend": cls._build_runtime_breakdown_rows(rows, "backend"), - "by_provider": cls._build_runtime_breakdown_rows(rows, "provider"), - "by_model": cls._build_runtime_breakdown_rows(rows, "model"), - } - def is_available(self) -> bool: if not self.enabled: return False