from flask import Blueprint, render_template, jsonify, request, send_from_directory, current_app, Response from .auth import login_required from loguru import logger import os import time import subprocess from datetime import datetime import platform import psutil from collections import deque import gzip import json import yaml import toml from utils.markdown_to_image import get_md2img_health_snapshot, warmup_md2img_browser_sync from utils.ai.llm_registry import LLMRegistry from base.plugin_common.plugin_interface import PluginStatus from utils.ai.unified_llm import UnifiedLLMClient from utils.decorator.async_job import async_job # 创建系统信息蓝图 system_bp = Blueprint('system', __name__) # 记录应用启动时间 APP_START_TIME = time.time() def _system_config_path() -> str: return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'config.yaml')) def _load_system_yaml() -> dict: config_path = _system_config_path() if not os.path.exists(config_path): return {} with open(config_path, 'r', encoding='utf-8') as f: return yaml.safe_load(f) or {} def _save_system_yaml(config_obj: dict) -> None: config_path = _system_config_path() with open(config_path, 'w', encoding='utf-8') as f: yaml.safe_dump(config_obj, f, allow_unicode=True, sort_keys=False) def _safe_int(value, default: int = 0) -> int: """把数据库 / Redis 返回的字符串数字安全转成整数。""" try: if value in (None, ""): return default return int(float(value)) except (TypeError, ValueError): return default def _safe_float(value, default: float = 0.0) -> float: """把数据库 / Redis 返回的值安全转成浮点数。""" try: if value in (None, ""): return default return float(value) except (TypeError, ValueError): return default def _format_bytes_to_mb(value: int) -> float: """把字节数转换为 MB,保留两位小数便于首页摘要展示。""" return round((_safe_float(value, 0.0) / 1024 / 1024), 2) def _extract_mysql_runtime_snapshot(db_manager) -> dict: """采集 MySQL 运行态摘要。 首页目标不是替代 DBA 工具,而是让管理员一眼判断: 1. 数据库是不是活着; 2. 当前连接压力高不高; 3. 当前库规模是否已经明显变大; 4. 有没有必要继续深入到更专业的监控页排查。 """ snapshot = { "status": "healthy", "summary": "连接正常", # 这里不要假定 db_manager 一定实现了扩展 helper。 # 当前仓库存在多种 DBConnectionManager 版本,因此先走 getattr,再在 SQL 查询里补足真实值。 "database": ( str(getattr(db_manager, "get_mysql_database_name", lambda: "")() or "").strip() if db_manager is not None else "" ), "version": "", "threads_connected": 0, "threads_running": 0, "max_connections": 0, "connection_usage_percent": 0.0, "questions_per_second": 0.0, "uptime_seconds": 0, "table_count": 0, "schema_size_mb": 0.0, "slow_query_threshold_ms": ( int(getattr(db_manager, "get_slow_query_threshold_ms", lambda default=300: default)(300)) if db_manager is not None else 300 ), } mysql_conn = db_manager.get_mysql_connection() try: with mysql_conn.cursor(dictionary=True) as cursor: # 基础探活与版本识别: # 1. SELECT VERSION() 成本极低; # 2. 相比只做 SELECT 1,它还能顺便拿到版本信息; # 3. 首页卡片里显示版本,方便线上排查“是不是某台库版本不一致”。 cursor.execute("SELECT VERSION() AS version, DATABASE() AS database_name") version_row = cursor.fetchone() or {} snapshot["version"] = str(version_row.get("version") or "").strip() snapshot["database"] = str(version_row.get("database_name") or snapshot["database"] or "").strip() cursor.execute( """ SHOW GLOBAL STATUS WHERE Variable_name IN ('Threads_connected', 'Threads_running', 'Questions', 'Uptime') """ ) status_rows = cursor.fetchall() or [] status_map = { str(row.get("Variable_name") or "").strip(): row.get("Value") for row in status_rows } cursor.execute( """ SHOW GLOBAL VARIABLES WHERE Variable_name IN ('max_connections') """ ) variable_rows = cursor.fetchall() or [] variable_map = { str(row.get("Variable_name") or "").strip(): row.get("Value") for row in variable_rows } # information_schema 聚合虽然比 SELECT 1 重一点,但仍属于轻量级元信息查询: # 1. 只在首页 30 秒级刷新一次,成本可接受; # 2. 能直接给出当前业务库表数量与体量变化; # 3. 对判断“是不是消息表膨胀导致后台变慢”很有帮助。 cursor.execute( """ SELECT COUNT(*) AS table_count, COALESCE(SUM(data_length + index_length), 0) AS schema_size_bytes FROM information_schema.tables WHERE table_schema = DATABASE() """ ) schema_row = cursor.fetchone() or {} snapshot["threads_connected"] = _safe_int(status_map.get("Threads_connected")) snapshot["threads_running"] = _safe_int(status_map.get("Threads_running")) snapshot["max_connections"] = _safe_int(variable_map.get("max_connections")) snapshot["uptime_seconds"] = _safe_int(status_map.get("Uptime")) total_questions = _safe_int(status_map.get("Questions")) if snapshot["uptime_seconds"] > 0: snapshot["questions_per_second"] = round(total_questions / snapshot["uptime_seconds"], 2) if snapshot["max_connections"] > 0: snapshot["connection_usage_percent"] = round( (snapshot["threads_connected"] / snapshot["max_connections"]) * 100, 1, ) snapshot["table_count"] = _safe_int(schema_row.get("table_count")) snapshot["schema_size_mb"] = _format_bytes_to_mb(schema_row.get("schema_size_bytes")) if snapshot["connection_usage_percent"] >= 80 or snapshot["threads_running"] >= 12: snapshot["status"] = "warning" snapshot["summary"] = ( f"连接压力偏高:已连接 {snapshot['threads_connected']} / {snapshot['max_connections']}," f"运行中线程 {snapshot['threads_running']}" ) else: snapshot["summary"] = ( f"连接正常:已连接 {snapshot['threads_connected']} / {snapshot['max_connections'] or '-'}," f"QPS {snapshot['questions_per_second']}" ) return snapshot except Exception as mysql_error: snapshot["status"] = "danger" snapshot["summary"] = f"MySQL 探测失败: {mysql_error}" return snapshot finally: mysql_conn.close() def _extract_redis_runtime_snapshot(db_manager) -> dict: """采集 Redis 运行态摘要。""" redis_config = getattr(db_manager, "redis_config", {}) or {} snapshot = { "status": "healthy", "summary": "连接正常", "db_index": _safe_int(redis_config.get("db", 0)), "key_count": 0, "connected_clients": 0, "blocked_clients": 0, "ops_per_sec": 0, "used_memory_human": "", "used_memory_peak_human": "", "memory_usage_percent": 0.0, "uptime_seconds": 0, "hit_rate_percent": 0.0, } try: redis_conn = db_manager.get_redis_connection() redis_conn.ping() info = redis_conn.info() or {} snapshot["key_count"] = _safe_int(redis_conn.dbsize()) snapshot["connected_clients"] = _safe_int(info.get("connected_clients")) snapshot["blocked_clients"] = _safe_int(info.get("blocked_clients")) snapshot["ops_per_sec"] = _safe_int(info.get("instantaneous_ops_per_sec")) snapshot["used_memory_human"] = str(info.get("used_memory_human") or "").strip() snapshot["used_memory_peak_human"] = str(info.get("used_memory_peak_human") or "").strip() snapshot["uptime_seconds"] = _safe_int(info.get("uptime_in_seconds")) maxmemory = _safe_int(info.get("maxmemory")) used_memory = _safe_int(info.get("used_memory")) if maxmemory > 0: snapshot["memory_usage_percent"] = round((used_memory / maxmemory) * 100, 1) keyspace_hits = _safe_int(info.get("keyspace_hits")) keyspace_misses = _safe_int(info.get("keyspace_misses")) if (keyspace_hits + keyspace_misses) > 0: snapshot["hit_rate_percent"] = round( (keyspace_hits / (keyspace_hits + keyspace_misses)) * 100, 1, ) if snapshot["blocked_clients"] > 0 or snapshot["memory_usage_percent"] >= 80: snapshot["status"] = "warning" snapshot["summary"] = ( f"缓存压力需关注:keys {snapshot['key_count']}," f"clients {snapshot['connected_clients']},ops/s {snapshot['ops_per_sec']}" ) else: snapshot["summary"] = ( f"缓存正常:keys {snapshot['key_count']}," f"clients {snapshot['connected_clients']},ops/s {snapshot['ops_per_sec']}" ) return snapshot except Exception as redis_error: snapshot["status"] = "danger" snapshot["summary"] = f"Redis 探测失败: {redis_error}" return snapshot def _parse_snapshot_datetime(value: str | None) -> datetime | None: """把首页摘要里常用的时间字符串安全转换为 datetime。""" text = str(value or "").strip() if not text: return None try: return datetime.strptime(text, "%Y-%m-%d %H:%M:%S") except ValueError: return None def _count_enabled_runtime_items(items) -> int: """统计启用项数量。""" rows = [] if isinstance(items, dict): rows = list(items.values()) elif isinstance(items, list): rows = list(items) count = 0 for row in rows: if not isinstance(row, dict): continue if "enabled" not in row or bool(row.get("enabled", True)): count += 1 return count def _extract_llm_catalog_summary() -> dict: """提取首页 LLM 路由配置摘要。""" try: catalog = LLMRegistry.get_catalog() or {} if catalog: providers = catalog.get("providers", {}) or {} dify_apps = catalog.get("dify_apps", {}) or {} backends = catalog.get("backends", {}) or {} scenes = catalog.get("scenes", {}) or {} default_scene = str(catalog.get("default_scene") or "").strip() default_backend = str(LLMRegistry.get_scene_backend_name(default_scene) or "").strip() if default_scene else "" return { "provider_count": _count_enabled_runtime_items(providers), "scene_count": _count_enabled_runtime_items(scenes), "target_count": _count_enabled_runtime_items(backends) + _count_enabled_runtime_items(dify_apps), "default_scene": default_scene, "default_backend": default_backend, "has_routing": _count_enabled_runtime_items(scenes) > 0, } legacy_llm = LLMRegistry.get_llm_config() or {} scenes = legacy_llm.get("scenes", {}) or {} backends = legacy_llm.get("backends", {}) or {} default_backend = str(legacy_llm.get("default_backend") or "").strip() return { "provider_count": 0, "scene_count": len(scenes) if isinstance(scenes, dict) else 0, "target_count": len(backends) if isinstance(backends, dict) else 0, "default_scene": "", "default_backend": default_backend, "has_routing": bool(scenes) or bool(default_backend), } except Exception as llm_catalog_error: logger.warning(f"提取 LLM 路由摘要失败: {llm_catalog_error}") return { "provider_count": 0, "scene_count": 0, "target_count": 0, "default_scene": "", "default_backend": "", "has_routing": False, } def _extract_ai_runtime_snapshot() -> dict: """构建首页 LLM 运行态摘要。""" runtime_snapshot = UnifiedLLMClient.get_runtime_snapshot() or {} last_call = dict(runtime_snapshot.get("last_call") or {}) catalog_summary = _extract_llm_catalog_summary() total_calls = _safe_int(runtime_snapshot.get("total_calls")) failed_calls = _safe_int(runtime_snapshot.get("failed_calls")) success_rate = _safe_float(runtime_snapshot.get("success_rate")) avg_latency_ms = _safe_float(runtime_snapshot.get("avg_latency_ms")) last_error = str(runtime_snapshot.get("last_error") or "").strip() snapshot = { **runtime_snapshot, "last_call": last_call, "provider_count": catalog_summary.get("provider_count", 0), "scene_count": catalog_summary.get("scene_count", 0), "target_count": catalog_summary.get("target_count", 0), "default_scene": catalog_summary.get("default_scene", ""), "default_backend": catalog_summary.get("default_backend", ""), "has_routing": bool(catalog_summary.get("has_routing")), "last_provider": str(last_call.get("provider") or "").strip(), "last_backend": str(last_call.get("backend") or "").strip(), "last_scene": str(last_call.get("scene") or "").strip(), "last_model": str(last_call.get("model") or "").strip(), "last_timestamp": str(last_call.get("timestamp") or "").strip(), "last_latency_ms": _safe_float(last_call.get("latency_ms")), "last_error": last_error, } if not snapshot["has_routing"]: snapshot["status"] = "warning" snapshot["summary"] = "当前未发现完整的 LLM 路由配置,建议先检查默认场景与后端绑定" return snapshot if total_calls <= 0: snapshot["status"] = "warning" snapshot["summary"] = ( f"已配置 {snapshot['scene_count']} 个场景、{snapshot['target_count']} 个目标," "最近窗口内暂无统一 LLM 调用记录" ) return snapshot if failed_calls >= total_calls and total_calls > 0: snapshot["status"] = "danger" snapshot["summary"] = ( f"最近 {total_calls} 次调用全部失败,成功率 {success_rate:.2f}%," f"平均耗时 {avg_latency_ms:.2f}ms" ) return snapshot if failed_calls > 0 or last_error: snapshot["status"] = "warning" snapshot["summary"] = ( f"最近 {total_calls} 次调用中失败 {failed_calls} 次,成功率 {success_rate:.2f}%," f"平均耗时 {avg_latency_ms:.2f}ms" ) return snapshot snapshot["status"] = "healthy" snapshot["summary"] = ( f"最近 {total_calls} 次调用全部成功,成功率 {success_rate:.2f}%," f"平均耗时 {avg_latency_ms:.2f}ms" ) return snapshot def _extract_scheduler_runtime_snapshot() -> dict: """聚合 async_job 运行态,生成首页任务调度摘要。""" runtime_rows = async_job.get_jobs_snapshot() next_run_candidates = [] failed_rows = [] system_job_count = 0 plugin_job_count = 0 for row in runtime_rows: job_key = str(row.get("job_key") or "").strip() owner_name = str(row.get("owner_name") or "system").strip().lower() next_run_at = _parse_snapshot_datetime(row.get("next_run_at")) last_status = str(row.get("last_status") or "").strip().lower() if job_key.startswith("plugin_schedule:") or owner_name != "system": plugin_job_count += 1 else: system_job_count += 1 if bool(row.get("enabled")) and next_run_at: next_run_candidates.append(next_run_at) if last_status in {"failed", "invalid_schedule"}: failed_rows.append(row) latest_failed_row = {} if failed_rows: failed_rows.sort( key=lambda row: ( _parse_snapshot_datetime(row.get("updated_at")) or _parse_snapshot_datetime(row.get("last_run_at")) or datetime.min ), reverse=True, ) latest_failed_row = failed_rows[0] invalid_jobs = sum( 1 for row in runtime_rows if str(row.get("last_status") or "").strip().lower() == "invalid_schedule" ) total_jobs = len(runtime_rows) enabled_jobs = sum(1 for row in runtime_rows if bool(row.get("enabled"))) running_jobs = sum(1 for row in runtime_rows if bool(row.get("running"))) failed_jobs = len(failed_rows) paused_jobs = total_jobs - enabled_jobs never_run_jobs = sum(1 for row in runtime_rows if str(row.get("last_status") or "").strip().lower() == "never") next_run_at_text = min(next_run_candidates).strftime("%Y-%m-%d %H:%M:%S") if next_run_candidates else "" latest_failed_error = str(latest_failed_row.get("last_error") or "").strip() if len(latest_failed_error) > 120: latest_failed_error = f"{latest_failed_error[:117]}..." snapshot = { "status": "healthy", "summary": "任务调度运行正常", "total_jobs": total_jobs, "enabled_jobs": enabled_jobs, "running_jobs": running_jobs, "failed_jobs": failed_jobs, "invalid_jobs": invalid_jobs, "paused_jobs": paused_jobs, "never_run_jobs": never_run_jobs, "system_job_count": system_job_count, "plugin_job_count": plugin_job_count, "next_run_at": next_run_at_text, "latest_failed_job_name": str(latest_failed_row.get("name") or "").strip(), "latest_failed_error": latest_failed_error, } if total_jobs <= 0: snapshot["status"] = "warning" snapshot["summary"] = "当前没有加载任何定时任务" return snapshot if invalid_jobs > 0: snapshot["status"] = "danger" snapshot["summary"] = f"发现 {invalid_jobs} 个任务调度配置非法,建议立即检查任务页" return snapshot if failed_jobs > 0: snapshot["status"] = "warning" snapshot["summary"] = ( f"最近有 {failed_jobs} 个任务执行失败," f"下一次执行 {next_run_at_text or '暂未计算'}" ) return snapshot if enabled_jobs <= 0: snapshot["status"] = "warning" snapshot["summary"] = "任务已加载,但当前没有启用中的调度任务" return snapshot if running_jobs > 0: snapshot["summary"] = ( f"当前有 {running_jobs} 个任务执行中," f"下一次执行 {next_run_at_text or '暂未计算'}" ) return snapshot snapshot["summary"] = f"已启用 {enabled_jobs} 个任务,下一次执行 {next_run_at_text or '暂未计算'}" return snapshot def _legacy_llm_to_catalog(legacy_llm: dict) -> dict: """把旧 llm(backends/scenes) 结构转换为新目录结构(仅用于兜底展示)。 说明: 1. 该转换不写库,只用于当目录表不可用时让后台页面仍可展示; 2. 规则与 DB bootstrap 一致:dify backend 拆成 provider+dify_app,其他保留为 backend。 """ llm = legacy_llm or {} old_backends = llm.get("backends", {}) or {} old_scenes = llm.get("scenes", {}) or {} default_backend = str(llm.get("default_backend") or "").strip() providers = [] dify_apps = [] backends = [] scenes = [] dify_template_cfg = None for backend in old_backends.values(): if isinstance(backend, dict) and str(backend.get("provider") or "").strip().lower() == "dify": dify_template_cfg = dict(backend) break if dify_template_cfg: providers.append( { "name": "dify_workflow_default", "provider_type": "dify", "enabled": True, "config": { "provider": "dify", "api_base_url": dify_template_cfg.get("api_base_url", ""), "endpoint": dify_template_cfg.get("endpoint", "workflows/run"), "mode": dify_template_cfg.get("mode", "workflow"), "response_mode": dify_template_cfg.get("response_mode", "blocking"), "request_timeout": dify_template_cfg.get("request_timeout", 60), "max_retries": dify_template_cfg.get("max_retries", 3), "retry_delay_seconds": dify_template_cfg.get("retry_delay_seconds", 1.0), }, } ) for backend_name, backend_cfg in old_backends.items(): if not isinstance(backend_cfg, dict): continue provider = str(backend_cfg.get("provider") or "").strip().lower() if provider == "dify": dify_apps.append( { "name": str(backend_name), "provider_template": "dify_workflow_default", "app_key": str(backend_cfg.get("api_key") or "").strip(), "workflow_output_key": str(backend_cfg.get("workflow_output_key") or "text").strip(), "enabled": True, "config": { "endpoint": backend_cfg.get("endpoint", ""), "mode": backend_cfg.get("mode", ""), "response_mode": backend_cfg.get("response_mode", ""), "request_timeout": backend_cfg.get("request_timeout", ""), }, } ) else: backends.append( { "name": str(backend_name), "enabled": True, "config": dict(backend_cfg), } ) if isinstance(old_scenes, dict) and old_scenes: for scene_name, backend_name in old_scenes.items(): scene_name = str(scene_name or "").strip() backend_name = str(backend_name or "").strip() if not scene_name or not backend_name: continue backend_cfg = old_backends.get(backend_name, {}) or {} provider = str((backend_cfg or {}).get("provider") or "").strip().lower() scenes.append( { "name": scene_name, "target_type": "dify_app" if provider == "dify" else "backend", "target_ref": backend_name, "enabled": True, } ) elif default_backend: default_cfg = old_backends.get(default_backend, {}) or {} provider = str((default_cfg or {}).get("provider") or "").strip().lower() scenes.append( { "name": "main.default", "target_type": "dify_app" if provider == "dify" else "backend", "target_ref": default_backend, "enabled": True, } ) default_scene = scenes[0]["name"] if scenes else "" return { "default_scene": default_scene, "providers": providers, "dify_apps": dify_apps, "backends": backends, "scenes": scenes, } def _load_llm_catalog_runtime() -> dict: """读取运行时 LLM 目录配置(优先 MySQL 新模型)。""" try: server = current_app.dashboard_server llm_catalog_db = getattr(server, "llm_catalog_db", None) if llm_catalog_db: catalog = llm_catalog_db.get_catalog() or {} if catalog and catalog.get("scenes"): return catalog except Exception as e: logger.warning(f"从 MySQL 读取 LLM 目录失败,回退 YAML: {e}") # 兜底:把 YAML 的 legacy llm 转成目录结构给后台展示。 config_obj = _load_system_yaml() llm_config = config_obj.get("llm", {}) or {} if not isinstance(llm_config, dict): llm_config = {} return _legacy_llm_to_catalog(llm_config) def _save_llm_catalog_runtime(catalog: dict) -> None: """保存运行时 LLM 目录配置到 MySQL。""" server = current_app.dashboard_server llm_catalog_db = getattr(server, "llm_catalog_db", None) if not llm_catalog_db: raise RuntimeError("llm_catalog_db 未初始化,无法保存 LLM 目录到 MySQL") ok = llm_catalog_db.save_catalog(catalog or {}) if not ok: raise RuntimeError("保存 LLM 目录到 MySQL 失败") def _plugins_root_path() -> str: """返回插件根目录绝对路径。""" return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'plugins')) def _scan_plugin_llm_usage() -> list: """扫描各插件 config.toml,提取插件与 LLM 场景的引用关系。 说明: 1. 该扫描仅用于后台可视化,不会改写插件配置; 2. 严格模式只采集 scene:顶层 section 写法,或嵌套在 llm/api/report_api 等节点; 3. 返回结果用于“插件 -> scene -> backend”依赖拓扑展示。 """ plugins_root = _plugins_root_path() if not os.path.isdir(plugins_root): return [] usages = [] def _collect_refs(plugin_name: str, section_name: str, payload: dict) -> None: """从单个配置节点收集 scene 引用。""" if not isinstance(payload, dict): return scene_name = str(payload.get("scene") or "").strip() if not scene_name: return usages.append({ "plugin": plugin_name, "section": section_name, "scene": scene_name, }) for item in sorted(os.listdir(plugins_root)): plugin_dir = os.path.join(plugins_root, item) if not os.path.isdir(plugin_dir): continue config_path = os.path.join(plugin_dir, "config.toml") if not os.path.exists(config_path): continue try: config_obj = toml.load(config_path) or {} except Exception as e: logger.warning(f"扫描插件 LLM 依赖失败: plugin={item}, path={config_path}, error={e}") continue # 优先扫描每个 section:兼容 [Dify] / [api] / [Douyu.report_api] 等写法。 for section_name, section_value in config_obj.items(): if isinstance(section_value, dict): _collect_refs(item, str(section_name), section_value) # 二层兜底:处理 llm/api/report_api 等嵌套节点。 for nested_name, nested_value in section_value.items(): if isinstance(nested_value, dict): _collect_refs(item, f"{section_name}.{nested_name}", nested_value) # 顶层兜底:兼容极少数直接写在根节点的 scene。 _collect_refs(item, "__root__", config_obj if isinstance(config_obj, dict) else {}) # 去重:同插件同 section 仅保留一条记录,避免前后兜底重复。 unique = {} for row in usages: key = f"{row.get('plugin')}::{row.get('section')}::{row.get('scene')}" unique[key] = row return sorted(unique.values(), key=lambda x: (x.get("plugin", ""), x.get("section", ""))) def _build_llm_topology() -> dict: """构建 LLM 拓扑视图(供后台页面直观展示依赖关系)。""" catalog = _load_llm_catalog_runtime() providers = {str(item.get("name") or "").strip(): item for item in (catalog.get("providers", []) or [])} dify_apps = {str(item.get("name") or "").strip(): item for item in (catalog.get("dify_apps", []) or [])} backends = {str(item.get("name") or "").strip(): item for item in (catalog.get("backends", []) or [])} scenes = {str(item.get("name") or "").strip(): item for item in (catalog.get("scenes", []) or [])} default_scene = str(catalog.get("default_scene") or "").strip() plugin_usages = _scan_plugin_llm_usage() topology_rows = [] for usage in plugin_usages: scene_name = str(usage.get("scene") or "").strip() scene = scenes.get(scene_name, {}) or {} target_type = str(scene.get("target_type") or "").strip().lower() target_ref = str(scene.get("target_ref") or "").strip() resolved_provider = "" resolved_target = target_ref valid_target = False if target_type == "dify_app": app = dify_apps.get(target_ref, {}) or {} provider_name = str(app.get("provider_template") or "").strip() provider = providers.get(provider_name, {}) or {} resolved_provider = str(provider.get("provider_type") or "").strip() valid_target = bool(app and provider) elif target_type == "backend": backend = backends.get(target_ref, {}) or {} backend_cfg = (backend.get("config") or {}) if isinstance(backend, dict) else {} resolved_provider = str((backend_cfg or {}).get("provider") or "").strip() valid_target = bool(backend) topology_rows.append({ "plugin": usage.get("plugin", ""), "section": usage.get("section", ""), "scene": scene_name, "target_type": target_type or "-", "target_ref": resolved_target or "-", "provider": resolved_provider or "-", "valid_scene": bool(scene_name in scenes), "valid_target": valid_target, }) return { "default_scene": default_scene, "providers": catalog.get("providers", []) or [], "dify_apps": catalog.get("dify_apps", []) or [], "backends": catalog.get("backends", []) or [], "scenes": catalog.get("scenes", []) or [], "plugin_usages": plugin_usages, "topology_rows": topology_rows, } @system_bp.route('/api_docs') @login_required def api_docs(): src = request.args.get('src') if not src: try: server = current_app.dashboard_server cfg = getattr(server.robot, "ipad_config", {}) or {} src = cfg.get("server_url", "http://127.0.0.1:8059/") except Exception: src = "http://127.0.0.1:8059/" return render_template('api_docs.html', src_url=src) @system_bp.route('/system_status') @login_required def system_status(): src = request.args.get('src') if not src: try: server = current_app.dashboard_server glances = getattr(server.robot, "config").glances if hasattr(server.robot, "config") else {} host = glances.get("host", "127.0.0.1") port = glances.get("port", 61208) src = f"http://{host}:{port}/" except Exception: src = "http://127.0.0.1:61208/" return render_template('system_status.html', src_url=src) @system_bp.route('/system_llm') @login_required def system_llm(): return render_template('system_llm.html') # 页面路由 @system_bp.route('/wx_logs') @login_required def wx_logs(): return render_template('wx_logs.html') # API路由 @system_bp.route('/api/system_info') @login_required def api_system_info(): try: # 获取系统信息 system_info = { "os": platform.system(), "os_version": platform.version(), "python_version": platform.python_version(), "cpu_usage": psutil.cpu_percent(), "memory_usage": psutil.virtual_memory().percent, "disk_usage": psutil.disk_usage('/').percent, "uptime": time.time() - APP_START_TIME, # 使用应用启动时间计算运行时长 "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "open_files": len(psutil.Process(os.getpid()).open_files()) } return jsonify({"success": True, "data": system_info}) except Exception as e: logger.error(f"获取系统信息失败: {e}") return jsonify({"success": False, "error": str(e)}), 500 @system_bp.route('/api/system_health_summary') @login_required def api_system_health_summary(): """聚合首页可观测性所需的关键健康信号。""" try: server = current_app.dashboard_server robot = getattr(server, "robot", None) plugin_manager = getattr(server, "plugin_manager", None) plugin_map = getattr(plugin_manager, "plugins", {}) or {} # 统计插件运行状态,便于首页快速判断“加载了多少、真正跑起来多少、是否有异常插件”。 plugin_status_counter = { "total": len(plugin_map), "running": 0, "loaded": 0, "stopped": 0, "error": 0, "unloaded": 0, "unknown": 0, } for plugin in plugin_map.values(): status = getattr(plugin, "status", None) if status == PluginStatus.RUNNING: plugin_status_counter["running"] += 1 elif status == PluginStatus.LOADED: plugin_status_counter["loaded"] += 1 elif status == PluginStatus.STOPPED: plugin_status_counter["stopped"] += 1 elif status == PluginStatus.ERROR: plugin_status_counter["error"] += 1 elif status == PluginStatus.UNLOADED: plugin_status_counter["unloaded"] += 1 else: plugin_status_counter["unknown"] += 1 # 错误数量直接复用现有统计库,避免为了首页卡片再单独写一套 SQL。 _, recent_error_count = server.stats_db.get_error_logs(days=1, page=1, limit=1) # 基础设施健康: # 1. MySQL / Redis 都在这里做“首页摘要级”探测,而不是完整深度巡检; # 2. 除了连通性,还补充少量负载指标,方便管理员快速判断是否需要继续下钻; # 3. 即使探测失败也只反馈到看板,不影响主接口整体返回。 mysql_snapshot = _extract_mysql_runtime_snapshot(server.db_manager) redis_snapshot = _extract_redis_runtime_snapshot(server.db_manager) # 首页只需要“够判断”的轻量结论,因此统一产出 status + summary 文本,前端无需重复拼装业务规则。 robot_running = bool(getattr(robot, "ipad_running", False)) robot_nickname = str(getattr(robot, "nickname", "") or "").strip() robot_wxid = str(getattr(robot, "wxid", "") or "").strip() robot_summary = "已连接并正在处理消息" if robot_running else "未连接或主循环未运行" if robot_nickname or robot_wxid: robot_summary = f"{robot_summary} · {robot_nickname or robot_wxid}" if plugin_status_counter["error"] > 0: plugin_status = "warning" plugin_summary = f"异常 {plugin_status_counter['error']} 个,运行中 {plugin_status_counter['running']} / {plugin_status_counter['total']}" elif plugin_status_counter["running"] == 0 and plugin_status_counter["total"] > 0: plugin_status = "warning" plugin_summary = f"暂无运行中插件,共加载 {plugin_status_counter['total']} 个" else: plugin_status = "healthy" plugin_summary = f"运行中 {plugin_status_counter['running']} / {plugin_status_counter['total']}" if recent_error_count > 0: error_status = "warning" error_summary = f"近 24 小时记录到 {recent_error_count} 条异常" else: error_status = "healthy" error_summary = "近 24 小时未记录到异常" # 首页 AI 卡片升级为“运行态 + 路由摘要”,仍然保持被动观测,不主动探活。 ai_runtime = _extract_ai_runtime_snapshot() # Markdown 转图更适合保留在专门页面里排障,首页右侧改成更通用的任务调度摘要。 scheduler_runtime = _extract_scheduler_runtime_snapshot() return jsonify({ "success": True, "data": { "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "robot": { "status": "healthy" if robot_running else "danger", "running": robot_running, "nickname": robot_nickname, "wxid": robot_wxid, "summary": robot_summary, }, "plugins": { "status": plugin_status, "summary": plugin_summary, **plugin_status_counter, }, "errors": { "status": error_status, "recent_24h_count": recent_error_count, "summary": error_summary, }, "infrastructure": { "status": ( "danger" if "danger" in {mysql_snapshot.get("status"), redis_snapshot.get("status")} else ("warning" if "warning" in {mysql_snapshot.get("status"), redis_snapshot.get("status")} else "healthy") ), "summary": ( "MySQL / Redis 均正常" if mysql_snapshot.get("status") == "healthy" and redis_snapshot.get("status") == "healthy" else ( "基础设施连接正常,但部分负载指标需要关注" if mysql_snapshot.get("status") != "danger" and redis_snapshot.get("status") != "danger" else "存在基础设施连接异常" ) ), "mysql": mysql_snapshot, "redis": redis_snapshot, }, "ai_runtime": { **ai_runtime, }, "scheduler": { **scheduler_runtime, }, } }) except Exception as e: logger.error(f"获取系统健康摘要失败: {e}") return jsonify({"success": False, "error": str(e)}), 500 @system_bp.route('/api/wx_logs') @login_required def api_wx_logs(): try: log_type = request.args.get('type', 'info') # 默认显示info日志 lines = request.args.get('lines', 100, type=int) # 默认显示最后100行 # 修正日志文件路径计算,获取项目根目录 project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..','logs')) if log_type == 'error': log_file = os.path.join(project_root, 'wx_error.log') elif log_type == 'debug': log_file = os.path.join(project_root, 'wx_debug.log') else: log_file = os.path.join(project_root, 'wx_info.log') log_content = [] if os.path.exists(log_file): try: chunk_size = 8192 with open(log_file, 'rb') as f: f.seek(0, os.SEEK_END) size = f.tell() buffer = b"" pos = size while pos > 0 and buffer.count(b'\n') <= lines: read_size = chunk_size if pos >= chunk_size else pos pos -= read_size f.seek(pos) buffer = f.read(read_size) + buffer log_content = [b.decode('utf-8', errors='ignore') for b in buffer.splitlines()[-lines:]] except Exception as e: logger.error(f"高效读取日志失败,回退到常规方式: {e}") with open(log_file, 'r', encoding='utf-8', errors='ignore') as f: log_content = list(deque(f, lines)) else: logger.warning(f"日志文件不存在: {log_file}") # 尝试列出项目根目录下的所有日志文件,帮助调试 try: all_files = [f for f in os.listdir(project_root) if f.endswith('.log')] logger.info(f"项目根目录下的日志文件: {all_files}") except Exception as e: logger.error(f"列出目录文件失败: {e}") payload = { "success": True, "data": { "log_type": log_type, "log_file": log_file, "content": log_content, "lines": len(log_content) } } accept = request.headers.get('Accept-Encoding', '') if 'gzip' in accept.lower(): body = json.dumps(payload, ensure_ascii=False).encode('utf-8') gz = gzip.compress(body, compresslevel=6) resp = Response(gz, mimetype='application/json') resp.headers['Content-Encoding'] = 'gzip' return resp return jsonify(payload) except Exception as e: logger.error(f"获取微信日志失败: {e}") return jsonify({"success": False, "error": str(e)}), 500 # 在现有路由下添加 @system_bp.route('/api/current_user_info', methods=['GET']) @login_required def get_current_user_info(): """获取当前登录的微信用户信息""" dashboard_server = current_app.dashboard_server result = dashboard_server.get_current_user_info() return jsonify(result) @system_bp.route('/api/system/config/raw', methods=['GET']) @login_required def get_system_config_raw(): try: config_path = _system_config_path() with open(config_path, 'r', encoding='utf-8') as f: config_text = f.read() # 展示运行时目录中的目标对象(backend+dify_app),便于调试 scene 绑定。 catalog = _load_llm_catalog_runtime() backend_names = [str(item.get("name") or "").strip() for item in (catalog.get("backends", []) or [])] app_names = [f"dify_app::{str(item.get('name') or '').strip()}" for item in (catalog.get("dify_apps", []) or [])] return jsonify({ "success": True, "data": config_text, "path": config_path, "llm_backends": sorted([name for name in backend_names + app_names if name]), }) except Exception as e: logger.error(f"读取系统配置失败: {e}") return jsonify({"success": False, "message": str(e)}), 500 @system_bp.route('/api/system/config/update', methods=['POST']) @login_required def update_system_config(): try: server = current_app.dashboard_server data = request.get_json() or {} config_text = data.get("config_text") if config_text is None: return jsonify({"success": False, "message": "缺少配置内容"}), 400 yaml.safe_load(config_text) config_path = _system_config_path() with open(config_path, 'w', encoding='utf-8') as f: f.write(config_text) if getattr(server, "robot", None) and getattr(server.robot, "config", None): server.robot.config.reload() # 保存 YAML 后立刻把运行时依赖对象同步一遍,避免必须重启进程才能读到新值。 server.robot.apply_runtime_config(reload_catalog=True) else: # 即便当前没有可用 robot 实例,也尽量把 LLM 路由缓存清掉,避免后续请求短时间内读旧值。 LLMRegistry.invalidate_cache() return jsonify({"success": True, "message": "全局配置已保存并应用到运行时"}) except Exception as e: logger.error(f"保存系统配置失败: {e}") return jsonify({"success": False, "message": str(e)}), 500 @system_bp.route('/api/system/llm_config', methods=['GET']) @login_required def get_system_llm_config(): try: catalog = _load_llm_catalog_runtime() providers = sorted((catalog.get("providers", []) or []), key=lambda item: str(item.get("name") or "")) dify_apps = sorted((catalog.get("dify_apps", []) or []), key=lambda item: str(item.get("name") or "")) backends = sorted((catalog.get("backends", []) or []), key=lambda item: str(item.get("name") or "")) scenes = sorted((catalog.get("scenes", []) or []), key=lambda item: str(item.get("name") or "")) topology = _build_llm_topology() return jsonify({ "success": True, "data": { "default_scene": catalog.get("default_scene", ""), "providers": providers, "dify_apps": dify_apps, "backends": backends, "scenes": scenes, "topology_rows": topology.get("topology_rows", []), "plugin_usages": topology.get("plugin_usages", []), # 新目录模型主存储在 MySQL。 "config_path": ( "mysql:t_llm_provider_templates + t_llm_dify_apps + " "t_llm_backends + t_llm_scenes (fallback yaml)" ), } }) except Exception as e: logger.error(f"读取全局 LLM 配置失败: {e}") return jsonify({"success": False, "message": str(e)}), 500 @system_bp.route('/api/system/llm_config', methods=['POST']) @login_required def update_system_llm_config(): try: server = current_app.dashboard_server data = request.get_json() or {} default_scene = str(data.get("default_scene") or "").strip() provider_list = data.get("providers", []) or [] dify_app_list = data.get("dify_apps", []) or [] backend_list = data.get("backends", []) or [] scene_list = data.get("scenes", []) or [] if not isinstance(provider_list, list): return jsonify({"success": False, "message": "providers 格式不正确"}), 400 if not isinstance(dify_app_list, list): return jsonify({"success": False, "message": "dify_apps 格式不正确"}), 400 if not isinstance(backend_list, list): return jsonify({"success": False, "message": "backends 格式不正确"}), 400 if not isinstance(scene_list, list): return jsonify({"success": False, "message": "scenes 格式不正确"}), 400 # 目录级校验:先收集名字集合,便于 scene target 引用校验。 provider_names = { str((item or {}).get("name") or "").strip() for item in provider_list if isinstance(item, dict) and str((item or {}).get("name") or "").strip() } dify_app_names = { str((item or {}).get("name") or "").strip() for item in dify_app_list if isinstance(item, dict) and str((item or {}).get("name") or "").strip() } backend_names = { str((item or {}).get("name") or "").strip() for item in backend_list if isinstance(item, dict) and str((item or {}).get("name") or "").strip() } for app in dify_app_list: if not isinstance(app, dict): continue app_name = str(app.get("name") or "").strip() if not app_name: continue provider_template = str(app.get("provider_template") or "").strip() if not provider_template: return jsonify({"success": False, "message": f"Dify应用 {app_name} 未绑定 Provider 模板"}), 400 if provider_template not in provider_names: return jsonify({"success": False, "message": f"Dify应用 {app_name} 绑定的 Provider 不存在"}), 400 app_key = str(app.get("app_key") or "").strip() if not app_key: return jsonify({"success": False, "message": f"Dify应用 {app_name} 缺少 app_key"}), 400 scene_names = set() for scene in scene_list: if not isinstance(scene, dict): continue scene_name = str(scene.get("name") or "").strip() target_type = str(scene.get("target_type") or "").strip().lower() target_ref = str(scene.get("target_ref") or "").strip() if not scene_name: continue if scene_name in scene_names: return jsonify({"success": False, "message": f"场景名重复: {scene_name}"}), 400 scene_names.add(scene_name) if target_type not in {"dify_app", "backend"}: return jsonify({"success": False, "message": f"场景 {scene_name} target_type 非法"}), 400 if not target_ref: return jsonify({"success": False, "message": f"场景 {scene_name} 未绑定目标"}), 400 if target_type == "dify_app" and target_ref not in dify_app_names: return jsonify({"success": False, "message": f"场景 {scene_name} 绑定的 dify_app 不存在"}), 400 if target_type == "backend" and target_ref not in backend_names: return jsonify({"success": False, "message": f"场景 {scene_name} 绑定的 backend 不存在"}), 400 if default_scene and default_scene not in scene_names: return jsonify({"success": False, "message": "默认场景不存在"}), 400 catalog = { "default_scene": default_scene, "providers": provider_list, "dify_apps": dify_app_list, "backends": backend_list, "scenes": scene_list, } _save_llm_catalog_runtime(catalog) if getattr(server, "robot", None) and getattr(server.robot, "config", None): server.robot.config.reload() # LLM 目录保存到 MySQL 后,需要主动失效运行时缓存,保证插件下一次调用直接走新目录。 server.robot.apply_runtime_config(reload_catalog=True) else: LLMRegistry.invalidate_cache() return jsonify({"success": True, "message": "全局 LLM 配置已保存并应用到运行时"}) except Exception as e: logger.error(f"保存全局 LLM 配置失败: {e}") return jsonify({"success": False, "message": str(e)}), 500 @system_bp.route('/api/system/md2img_health', methods=['GET']) @login_required def get_md2img_health(): """查询 Markdown 转图运行时健康状态。""" try: # 默认只读取状态,不主动拉起 runtime。 # 当后台希望“刷新并顺便拉起”时,可传 ensure_runtime=true。 ensure_runtime = str(request.args.get('ensure_runtime', 'false')).strip().lower() in {'1', 'true', 'yes', 'on'} data = get_md2img_health_snapshot(ensure_runtime=ensure_runtime) return jsonify({"success": True, "data": data}) except Exception as e: logger.error(f"获取 md2img 健康状态失败: {e}") return jsonify({"success": False, "message": str(e)}), 500 @system_bp.route('/api/system/md2img_warmup', methods=['POST']) @login_required def trigger_md2img_warmup(): """手动触发 Markdown 转图浏览器预热。""" try: payload = request.get_json(silent=True) or {} timeout_seconds = int(payload.get('timeout_seconds', 45) or 45) timeout_seconds = max(10, min(timeout_seconds, 180)) ok = warmup_md2img_browser_sync(timeout_seconds=timeout_seconds) data = get_md2img_health_snapshot(ensure_runtime=False) if ok: return jsonify({ "success": True, "message": f"预热完成(timeout={timeout_seconds}s)", "data": data, }) return jsonify({ "success": False, "message": f"预热失败(timeout={timeout_seconds}s),请查看运行日志", "data": data, }), 500 except Exception as e: logger.error(f"触发 md2img 预热失败: {e}") return jsonify({"success": False, "message": str(e)}), 500 @system_bp.route('/api/restart_service', methods=['POST']) @login_required def restart_service(): """调用项目根目录下的 restart.sh 重启服务""" try: project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..')) script_path = os.path.join(project_root, 'restart.sh') if not os.path.exists(script_path): return jsonify({"success": False, "message": f"未找到脚本: {script_path}"}), 404 subprocess.Popen( ['bash', script_path], cwd=project_root, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, start_new_session=True ) logger.warning(f"后台触发服务重启脚本: {script_path}") return jsonify({ "success": True, "message": "已触发重启脚本,服务将在短时间内重启" }) except Exception as e: logger.error(f"触发服务重启失败: {e}") return jsonify({"success": False, "message": str(e)}), 500