abot/admin/dashboard/blueprints/system.py

from flask import Blueprint, render_template, jsonify, request, send_from_directory, current_app, Response
from .auth import login_required
from loguru import logger
import os
import time
import subprocess
import socket
from datetime import datetime
import platform
import psutil
from collections import deque
import gzip
import json
import yaml
import toml
from utils.markdown_to_image import get_md2img_health_snapshot, warmup_md2img_browser_sync
from utils.ai.llm_registry import LLMRegistry
from base.plugin_common.plugin_interface import PluginStatus
from utils.ai.unified_llm import UnifiedLLMClient
from utils.decorator.async_job import async_job

# 创建系统信息蓝图
system_bp = Blueprint('system', __name__)

# 记录应用启动时间
APP_START_TIME = time.time()
# 记录最近一次网络计数器采样，用于在资源监控页估算上/下行速率。
# 这里故意只做“页面级轻量采样”：
# 1. 不起额外守护线程，避免为了展示速率再引入常驻后台任务；
# 2. 只有用户刷新/轮询资源页时才计算速率，开销接近于零；
# 3. 即便进程重启缓存丢失，也只会让第一次速率显示为 0，不影响整体可用性。
NETWORK_IO_SAMPLE = {
    "timestamp": 0.0,
    "bytes_sent": 0,
    "bytes_recv": 0,
}

# 资源监控页默认隐藏这类“对日常容量判断帮助不大”的系统挂载：
# 1. `squashfs` 基本就是 Ubuntu / Snap 挂出来的只读镜像；
# 2. `/dev/loop*` 多数也是镜像回环设备，看起来 100% 但不代表真实磁盘爆满；
# 3. `/proc` / `/sys` / `/dev` 这类伪文件系统更偏内核运行态，不适合放在业务运维首页里。
IGNORED_DISK_FSTYPES = {
    "squashfs",
    "proc",
    "sysfs",
    "devtmpfs",
    "devfs",
    "securityfs",
    "cgroup",
    "cgroup2",
    "pstore",
    "autofs",
    "mqueue",
    "hugetlbfs",
    "debugfs",
    "tracefs",
    "configfs",
    "fusectl",
    "rpc_pipefs",
    "tmpfs",
}
IGNORED_DISK_MOUNTPOINT_PREFIXES = (
    "/snap/",
    "/proc",
    "/sys",
    "/dev",
    "/run/",
    "/var/lib/snapd/",
)
IGNORED_DISK_DEVICE_PREFIXES = (
    "/dev/loop",
)


def _system_config_path() -> str:
    return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'config.yaml'))


def _load_system_yaml() -> dict:
    config_path = _system_config_path()
    if not os.path.exists(config_path):
        return {}
    with open(config_path, 'r', encoding='utf-8') as f:
        return yaml.safe_load(f) or {}


def _save_system_yaml(config_obj: dict) -> None:
    config_path = _system_config_path()
    with open(config_path, 'w', encoding='utf-8') as f:
        yaml.safe_dump(config_obj, f, allow_unicode=True, sort_keys=False)


def _safe_int(value, default: int = 0) -> int:
    """把数据库 / Redis 返回的字符串数字安全转成整数。"""
    try:
        if value in (None, ""):
            return default
        return int(float(value))
    except (TypeError, ValueError):
        return default


def _safe_float(value, default: float = 0.0) -> float:
    """把数据库 / Redis 返回的值安全转成浮点数。"""
    try:
        if value in (None, ""):
            return default
        return float(value)
    except (TypeError, ValueError):
        return default


def _format_bytes_to_mb(value: int) -> float:
    """把字节数转换为 MB，保留两位小数便于首页摘要展示。"""
    return round((_safe_float(value, 0.0) / 1024 / 1024), 2)


def _safe_divide(numerator: float, denominator: float, default: float = 0.0) -> float:
    """安全除法，避免速率与占比计算时被 0 除打断。"""
    try:
        if not denominator:
            return default
        return numerator / denominator
    except Exception:
        return default


def _primary_disk_path() -> str:
    """返回当前系统最稳妥的主盘路径。"""
    # 资源监控页既要兼容你本地 Windows 开发环境，也要兼容线上 Linux：
    # 1. 优先用系统根目录，Linux 下是 /；
    # 2. Windows 下会自动变成当前盘符根路径；
    # 3. 避免把磁盘路径硬编码成 /，导致本地调试时报错。
    return os.path.abspath(os.sep)


def _format_datetime_text(timestamp_value: float | int | None) -> str:
    """把时间戳格式化为后台页面可直接展示的文本。"""
    if not timestamp_value:
        return "-"
    try:
        return datetime.fromtimestamp(float(timestamp_value)).strftime("%Y-%m-%d %H:%M:%S")
    except Exception:
        return "-"


def _should_ignore_disk_partition(partition) -> bool:
    """判断某个挂载点是否应该从首页磁盘列表中隐藏。"""
    mountpoint = str(getattr(partition, "mountpoint", "") or "").strip()
    device = str(getattr(partition, "device", "") or "").strip()
    fstype = str(getattr(partition, "fstype", "") or "").strip().lower()

    # Windows 下通常不会命中这些 Linux 伪文件系统规则，这里保持跨平台兼容即可。
    if fstype in IGNORED_DISK_FSTYPES:
        return True

    if any(mountpoint.startswith(prefix) for prefix in IGNORED_DISK_MOUNTPOINT_PREFIXES):
        return True

    if any(device.startswith(prefix) for prefix in IGNORED_DISK_DEVICE_PREFIXES):
        return True

    return False


def _sample_network_speed() -> dict:
    """根据两次页面采样估算网络上下行速率。"""
    counters = psutil.net_io_counters()
    now = time.time()
    current_sent = _safe_int(getattr(counters, "bytes_sent", 0))
    current_recv = _safe_int(getattr(counters, "bytes_recv", 0))
    last_timestamp = _safe_float(NETWORK_IO_SAMPLE.get("timestamp"))
    elapsed = max(now - last_timestamp, 0.0)

    upload_speed = 0.0
    download_speed = 0.0
    if elapsed > 0 and last_timestamp > 0:
        upload_speed = _safe_divide(current_sent - _safe_int(NETWORK_IO_SAMPLE.get("bytes_sent")), elapsed, 0.0)
        download_speed = _safe_divide(current_recv - _safe_int(NETWORK_IO_SAMPLE.get("bytes_recv")), elapsed, 0.0)
        upload_speed = max(upload_speed, 0.0)
        download_speed = max(download_speed, 0.0)

    NETWORK_IO_SAMPLE["timestamp"] = now
    NETWORK_IO_SAMPLE["bytes_sent"] = current_sent
    NETWORK_IO_SAMPLE["bytes_recv"] = current_recv

    return {
        "bytes_sent": current_sent,
        "bytes_recv": current_recv,
        "upload_speed_bps": round(upload_speed, 2),
        "download_speed_bps": round(download_speed, 2),
    }


def _extract_server_runtime_snapshot() -> dict:
    """构建资源监控页使用的轻量服务器运行态快照。"""
    # 这套快照有意只覆盖“日常观察最有价值”的内容：
    # 1. 主机资源：CPU / 内存 / 磁盘 / 网络；
    # 2. 应用进程：当前 ABOT 进程是否活着、吃了多少资源；
    # 3. 基础设施：MySQL / Redis 继续复用现有摘要探测；
    # 4. 不再依赖 glances 进程，部署和运维负担会轻很多。
    server = current_app.dashboard_server
    current_process = psutil.Process(os.getpid())
    virtual_memory = psutil.virtual_memory()
    swap_memory = psutil.swap_memory()
    cpu_usage = psutil.cpu_percent(interval=None)
    process_cpu_usage = current_process.cpu_percent(interval=None)
    boot_time = psutil.boot_time()
    network_sample = _sample_network_speed()
    disk_io = psutil.disk_io_counters()
    try:
        load_values = os.getloadavg()
    except (AttributeError, OSError):
        load_values = (0.0, 0.0, 0.0)

    disk_items = []
    hidden_disk_items_count = 0
    seen_mountpoints = set()
    for partition in psutil.disk_partitions(all=False):
        mountpoint = str(getattr(partition, "mountpoint", "") or "").strip()
        if not mountpoint or mountpoint in seen_mountpoints:
            continue
        seen_mountpoints.add(mountpoint)
        if _should_ignore_disk_partition(partition):
            hidden_disk_items_count += 1
            continue
        try:
            usage = psutil.disk_usage(mountpoint)
        except Exception:
            continue
        disk_items.append({
            "device": str(getattr(partition, "device", "") or "").strip() or mountpoint,
            "mountpoint": mountpoint,
            "fstype": str(getattr(partition, "fstype", "") or "").strip(),
            "total_bytes": _safe_int(getattr(usage, "total", 0)),
            "used_bytes": _safe_int(getattr(usage, "used", 0)),
            "free_bytes": _safe_int(getattr(usage, "free", 0)),
            "usage_percent": round(_safe_float(getattr(usage, "percent", 0.0)), 1),
        })
    disk_items.sort(key=lambda item: item.get("usage_percent", 0.0), reverse=True)

    primary_disk_usage = psutil.disk_usage(_primary_disk_path())
    process_memory = current_process.memory_info()
    try:
        open_files = len(current_process.open_files())
    except Exception:
        open_files = 0

    try:
        tcp_connections = current_process.connections(kind="inet")
        established_connections = sum(
            1 for conn in tcp_connections if str(getattr(conn, "status", "") or "").upper() == "ESTABLISHED"
        )
    except Exception:
        established_connections = 0

    return {
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "server": {
            "hostname": socket.gethostname(),
            "os": platform.system(),
            "os_version": platform.version(),
            "python_version": platform.python_version(),
            "boot_time": _format_datetime_text(boot_time),
            "uptime_seconds": round(max(time.time() - boot_time, 0), 2),
        },
        "cpu": {
            "usage_percent": round(cpu_usage, 1),
            "logical_count": psutil.cpu_count(logical=True) or 0,
            "physical_count": psutil.cpu_count(logical=False) or 0,
            "load_1": round(_safe_float(load_values[0]), 2),
            "load_5": round(_safe_float(load_values[1]), 2),
            "load_15": round(_safe_float(load_values[2]), 2),
        },
        "memory": {
            "usage_percent": round(_safe_float(getattr(virtual_memory, "percent", 0.0)), 1),
            "total_bytes": _safe_int(getattr(virtual_memory, "total", 0)),
            "used_bytes": _safe_int(getattr(virtual_memory, "used", 0)),
            "available_bytes": _safe_int(getattr(virtual_memory, "available", 0)),
            "swap_usage_percent": round(_safe_float(getattr(swap_memory, "percent", 0.0)), 1),
            "swap_total_bytes": _safe_int(getattr(swap_memory, "total", 0)),
            "swap_used_bytes": _safe_int(getattr(swap_memory, "used", 0)),
        },
        "disk": {
            "primary_usage_percent": round(_safe_float(getattr(primary_disk_usage, "percent", 0.0)), 1),
            "primary_total_bytes": _safe_int(getattr(primary_disk_usage, "total", 0)),
            "primary_used_bytes": _safe_int(getattr(primary_disk_usage, "used", 0)),
            "io_read_bytes": _safe_int(getattr(disk_io, "read_bytes", 0)) if disk_io else 0,
            "io_write_bytes": _safe_int(getattr(disk_io, "write_bytes", 0)) if disk_io else 0,
            "hidden_virtual_mount_count": hidden_disk_items_count,
            "items": disk_items[:8],
        },
        "network": {
            **network_sample,
            "established_connections": established_connections,
        },
        "process": {
            "pid": current_process.pid,
            "cpu_percent": round(process_cpu_usage, 1),
            "memory_percent": round(current_process.memory_percent(), 2),
            "memory_rss_bytes": _safe_int(getattr(process_memory, "rss", 0)),
            "thread_count": current_process.num_threads(),
            "open_files": open_files,
            "create_time": _format_datetime_text(current_process.create_time()),
            "uptime_seconds": round(max(time.time() - current_process.create_time(), 0), 2),
        },
        "infrastructure": {
            "mysql": _extract_mysql_runtime_snapshot(server.db_manager),
            "redis": _extract_redis_runtime_snapshot(server.db_manager),
        },
    }


def _extract_mysql_runtime_snapshot(db_manager) -> dict:
    """采集 MySQL 运行态摘要。

    首页目标不是替代 DBA 工具，而是让管理员一眼判断：
    1. 数据库是不是活着；
    2. 当前连接压力高不高；
    3. 当前库规模是否已经明显变大；
    4. 有没有必要继续深入到更专业的监控页排查。
    """
    snapshot = {
        "status": "healthy",
        "summary": "连接正常",
        # 这里不要假定 db_manager 一定实现了扩展 helper。
        # 当前仓库存在多种 DBConnectionManager 版本，因此先走 getattr，再在 SQL 查询里补足真实值。
        "database": (
            str(getattr(db_manager, "get_mysql_database_name", lambda: "")() or "").strip()
            if db_manager is not None else ""
        ),
        "version": "",
        "threads_connected": 0,
        "threads_running": 0,
        "max_connections": 0,
        "connection_usage_percent": 0.0,
        "questions_per_second": 0.0,
        "uptime_seconds": 0,
        "table_count": 0,
        "schema_size_mb": 0.0,
        "slow_query_threshold_ms": (
            int(getattr(db_manager, "get_slow_query_threshold_ms", lambda default=300: default)(300))
            if db_manager is not None else 300
        ),
    }

    mysql_conn = db_manager.get_mysql_connection()
    try:
        with mysql_conn.cursor(dictionary=True) as cursor:
            # 基础探活与版本识别：
            # 1. SELECT VERSION() 成本极低；
            # 2. 相比只做 SELECT 1，它还能顺便拿到版本信息；
            # 3. 首页卡片里显示版本，方便线上排查“是不是某台库版本不一致”。
            cursor.execute("SELECT VERSION() AS version, DATABASE() AS database_name")
            version_row = cursor.fetchone() or {}
            snapshot["version"] = str(version_row.get("version") or "").strip()
            snapshot["database"] = str(version_row.get("database_name") or snapshot["database"] or "").strip()

            cursor.execute(
                """
                SHOW GLOBAL STATUS
                WHERE Variable_name IN ('Threads_connected', 'Threads_running', 'Questions', 'Uptime')
                """
            )
            status_rows = cursor.fetchall() or []
            status_map = {
                str(row.get("Variable_name") or "").strip(): row.get("Value")
                for row in status_rows
            }

            cursor.execute(
                """
                SHOW GLOBAL VARIABLES
                WHERE Variable_name IN ('max_connections')
                """
            )
            variable_rows = cursor.fetchall() or []
            variable_map = {
                str(row.get("Variable_name") or "").strip(): row.get("Value")
                for row in variable_rows
            }

            # information_schema 聚合虽然比 SELECT 1 重一点，但仍属于轻量级元信息查询：
            # 1. 只在首页 30 秒级刷新一次，成本可接受；
            # 2. 能直接给出当前业务库表数量与体量变化；
            # 3. 对判断“是不是消息表膨胀导致后台变慢”很有帮助。
            cursor.execute(
                """
                SELECT
                    COUNT(*) AS table_count,
                    COALESCE(SUM(data_length + index_length), 0) AS schema_size_bytes
                FROM information_schema.tables
                WHERE table_schema = DATABASE()
                """
            )
            schema_row = cursor.fetchone() or {}

        snapshot["threads_connected"] = _safe_int(status_map.get("Threads_connected"))
        snapshot["threads_running"] = _safe_int(status_map.get("Threads_running"))
        snapshot["max_connections"] = _safe_int(variable_map.get("max_connections"))
        snapshot["uptime_seconds"] = _safe_int(status_map.get("Uptime"))
        total_questions = _safe_int(status_map.get("Questions"))
        if snapshot["uptime_seconds"] > 0:
            snapshot["questions_per_second"] = round(total_questions / snapshot["uptime_seconds"], 2)
        if snapshot["max_connections"] > 0:
            snapshot["connection_usage_percent"] = round(
                (snapshot["threads_connected"] / snapshot["max_connections"]) * 100,
                1,
            )
        snapshot["table_count"] = _safe_int(schema_row.get("table_count"))
        snapshot["schema_size_mb"] = _format_bytes_to_mb(schema_row.get("schema_size_bytes"))

        if snapshot["connection_usage_percent"] >= 80 or snapshot["threads_running"] >= 12:
            snapshot["status"] = "warning"
            snapshot["summary"] = (
                f"连接压力偏高：已连接 {snapshot['threads_connected']} / {snapshot['max_connections']}，"
                f"运行中线程 {snapshot['threads_running']}"
            )
        else:
            snapshot["summary"] = (
                f"连接正常：已连接 {snapshot['threads_connected']} / {snapshot['max_connections'] or '-'}，"
                f"QPS {snapshot['questions_per_second']}"
            )
        return snapshot
    except Exception as mysql_error:
        snapshot["status"] = "danger"
        snapshot["summary"] = f"MySQL 探测失败: {mysql_error}"
        return snapshot
    finally:
        mysql_conn.close()


def _extract_redis_runtime_snapshot(db_manager) -> dict:
    """采集 Redis 运行态摘要。"""
    redis_config = getattr(db_manager, "redis_config", {}) or {}
    snapshot = {
        "status": "healthy",
        "summary": "连接正常",
        "db_index": _safe_int(redis_config.get("db", 0)),
        "key_count": 0,
        "connected_clients": 0,
        "blocked_clients": 0,
        "ops_per_sec": 0,
        "used_memory_human": "",
        "used_memory_peak_human": "",
        "memory_usage_percent": 0.0,
        "uptime_seconds": 0,
        "hit_rate_percent": 0.0,
    }

    try:
        redis_conn = db_manager.get_redis_connection()
        redis_conn.ping()
        info = redis_conn.info() or {}
        snapshot["key_count"] = _safe_int(redis_conn.dbsize())
        snapshot["connected_clients"] = _safe_int(info.get("connected_clients"))
        snapshot["blocked_clients"] = _safe_int(info.get("blocked_clients"))
        snapshot["ops_per_sec"] = _safe_int(info.get("instantaneous_ops_per_sec"))
        snapshot["used_memory_human"] = str(info.get("used_memory_human") or "").strip()
        snapshot["used_memory_peak_human"] = str(info.get("used_memory_peak_human") or "").strip()
        snapshot["uptime_seconds"] = _safe_int(info.get("uptime_in_seconds"))

        maxmemory = _safe_int(info.get("maxmemory"))
        used_memory = _safe_int(info.get("used_memory"))
        if maxmemory > 0:
            snapshot["memory_usage_percent"] = round((used_memory / maxmemory) * 100, 1)

        keyspace_hits = _safe_int(info.get("keyspace_hits"))
        keyspace_misses = _safe_int(info.get("keyspace_misses"))
        if (keyspace_hits + keyspace_misses) > 0:
            snapshot["hit_rate_percent"] = round(
                (keyspace_hits / (keyspace_hits + keyspace_misses)) * 100,
                1,
            )

        if snapshot["blocked_clients"] > 0 or snapshot["memory_usage_percent"] >= 80:
            snapshot["status"] = "warning"
            snapshot["summary"] = (
                f"缓存压力需关注：keys {snapshot['key_count']}，"
                f"clients {snapshot['connected_clients']}，ops/s {snapshot['ops_per_sec']}"
            )
        else:
            snapshot["summary"] = (
                f"缓存正常：keys {snapshot['key_count']}，"
                f"clients {snapshot['connected_clients']}，ops/s {snapshot['ops_per_sec']}"
            )
        return snapshot
    except Exception as redis_error:
        snapshot["status"] = "danger"
        snapshot["summary"] = f"Redis 探测失败: {redis_error}"
        return snapshot


def _parse_snapshot_datetime(value: str | None) -> datetime | None:
    """把首页摘要里常用的时间字符串安全转换为 datetime。"""
    text = str(value or "").strip()
    if not text:
        return None
    try:
        return datetime.strptime(text, "%Y-%m-%d %H:%M:%S")
    except ValueError:
        return None


def _count_enabled_runtime_items(items) -> int:
    """统计启用项数量。"""
    rows = []
    if isinstance(items, dict):
        rows = list(items.values())
    elif isinstance(items, list):
        rows = list(items)
    count = 0
    for row in rows:
        if not isinstance(row, dict):
            continue
        if "enabled" not in row or bool(row.get("enabled", True)):
            count += 1
    return count


def _extract_llm_catalog_summary() -> dict:
    """提取首页 LLM 路由配置摘要。"""
    try:
        catalog = LLMRegistry.get_catalog() or {}
        if catalog:
            providers = catalog.get("providers", {}) or {}
            dify_apps = catalog.get("dify_apps", {}) or {}
            backends = catalog.get("backends", {}) or {}
            scenes = catalog.get("scenes", {}) or {}
            default_scene = str(catalog.get("default_scene") or "").strip()
            default_backend = str(LLMRegistry.get_scene_backend_name(default_scene) or "").strip() if default_scene else ""
            return {
                "provider_count": _count_enabled_runtime_items(providers),
                "scene_count": _count_enabled_runtime_items(scenes),
                "target_count": _count_enabled_runtime_items(backends) + _count_enabled_runtime_items(dify_apps),
                "default_scene": default_scene,
                "default_backend": default_backend,
                "has_routing": _count_enabled_runtime_items(scenes) > 0,
            }

        legacy_llm = LLMRegistry.get_llm_config() or {}
        scenes = legacy_llm.get("scenes", {}) or {}
        backends = legacy_llm.get("backends", {}) or {}
        default_backend = str(legacy_llm.get("default_backend") or "").strip()
        return {
            "provider_count": 0,
            "scene_count": len(scenes) if isinstance(scenes, dict) else 0,
            "target_count": len(backends) if isinstance(backends, dict) else 0,
            "default_scene": "",
            "default_backend": default_backend,
            "has_routing": bool(scenes) or bool(default_backend),
        }
    except Exception as llm_catalog_error:
        logger.warning(f"提取 LLM 路由摘要失败: {llm_catalog_error}")
        return {
            "provider_count": 0,
            "scene_count": 0,
            "target_count": 0,
            "default_scene": "",
            "default_backend": "",
            "has_routing": False,
        }


def _extract_ai_runtime_snapshot() -> dict:
    """构建首页 LLM 运行态摘要。"""
    runtime_snapshot = UnifiedLLMClient.get_runtime_snapshot() or {}
    last_call = dict(runtime_snapshot.get("last_call") or {})
    catalog_summary = _extract_llm_catalog_summary()

    total_calls = _safe_int(runtime_snapshot.get("total_calls"))
    failed_calls = _safe_int(runtime_snapshot.get("failed_calls"))
    success_rate = _safe_float(runtime_snapshot.get("success_rate"))
    avg_latency_ms = _safe_float(runtime_snapshot.get("avg_latency_ms"))
    last_error = str(runtime_snapshot.get("last_error") or "").strip()

    snapshot = {
        **runtime_snapshot,
        "last_call": last_call,
        "provider_count": catalog_summary.get("provider_count", 0),
        "scene_count": catalog_summary.get("scene_count", 0),
        "target_count": catalog_summary.get("target_count", 0),
        "default_scene": catalog_summary.get("default_scene", ""),
        "default_backend": catalog_summary.get("default_backend", ""),
        "has_routing": bool(catalog_summary.get("has_routing")),
        "last_provider": str(last_call.get("provider") or "").strip(),
        "last_backend": str(last_call.get("backend") or "").strip(),
        "last_scene": str(last_call.get("scene") or "").strip(),
        "last_model": str(last_call.get("model") or "").strip(),
        "last_timestamp": str(last_call.get("timestamp") or "").strip(),
        "last_latency_ms": _safe_float(last_call.get("latency_ms")),
        "last_error": last_error,
    }

    if not snapshot["has_routing"]:
        snapshot["status"] = "warning"
        snapshot["summary"] = "当前未发现完整的 LLM 路由配置，建议先检查默认场景与后端绑定"
        return snapshot

    if total_calls <= 0:
        snapshot["status"] = "warning"
        snapshot["summary"] = (
            f"已配置 {snapshot['scene_count']} 个场景、{snapshot['target_count']} 个目标，"
            "最近窗口内暂无统一 LLM 调用记录"
        )
        return snapshot

    if failed_calls >= total_calls and total_calls > 0:
        snapshot["status"] = "danger"
        snapshot["summary"] = (
            f"最近 {total_calls} 次调用全部失败，成功率 {success_rate:.2f}%，"
            f"平均耗时 {avg_latency_ms:.2f}ms"
        )
        return snapshot

    if failed_calls > 0 or last_error:
        snapshot["status"] = "warning"
        snapshot["summary"] = (
            f"最近 {total_calls} 次调用中失败 {failed_calls} 次，成功率 {success_rate:.2f}%，"
            f"平均耗时 {avg_latency_ms:.2f}ms"
        )
        return snapshot

    snapshot["status"] = "healthy"
    snapshot["summary"] = (
        f"最近 {total_calls} 次调用全部成功，成功率 {success_rate:.2f}%，"
        f"平均耗时 {avg_latency_ms:.2f}ms"
    )
    return snapshot


def _extract_scheduler_runtime_snapshot() -> dict:
    """聚合 async_job 运行态，生成首页任务调度摘要。"""
    runtime_rows = async_job.get_jobs_snapshot()
    next_run_candidates = []
    failed_rows = []
    system_job_count = 0
    plugin_job_count = 0

    for row in runtime_rows:
        job_key = str(row.get("job_key") or "").strip()
        owner_name = str(row.get("owner_name") or "system").strip().lower()
        next_run_at = _parse_snapshot_datetime(row.get("next_run_at"))
        last_status = str(row.get("last_status") or "").strip().lower()

        if job_key.startswith("plugin_schedule:") or owner_name != "system":
            plugin_job_count += 1
        else:
            system_job_count += 1

        if bool(row.get("enabled")) and next_run_at:
            next_run_candidates.append(next_run_at)
        if last_status in {"failed", "invalid_schedule"}:
            failed_rows.append(row)

    latest_failed_row = {}
    if failed_rows:
        failed_rows.sort(
            key=lambda row: (
                _parse_snapshot_datetime(row.get("updated_at"))
                or _parse_snapshot_datetime(row.get("last_run_at"))
                or datetime.min
            ),
            reverse=True,
        )
        latest_failed_row = failed_rows[0]

    invalid_jobs = sum(
        1 for row in runtime_rows if str(row.get("last_status") or "").strip().lower() == "invalid_schedule"
    )
    total_jobs = len(runtime_rows)
    enabled_jobs = sum(1 for row in runtime_rows if bool(row.get("enabled")))
    running_jobs = sum(1 for row in runtime_rows if bool(row.get("running")))
    failed_jobs = len(failed_rows)
    paused_jobs = total_jobs - enabled_jobs
    never_run_jobs = sum(1 for row in runtime_rows if str(row.get("last_status") or "").strip().lower() == "never")
    next_run_at_text = min(next_run_candidates).strftime("%Y-%m-%d %H:%M:%S") if next_run_candidates else ""
    latest_failed_error = str(latest_failed_row.get("last_error") or "").strip()
    if len(latest_failed_error) > 120:
        latest_failed_error = f"{latest_failed_error[:117]}..."

    snapshot = {
        "status": "healthy",
        "summary": "任务调度运行正常",
        "total_jobs": total_jobs,
        "enabled_jobs": enabled_jobs,
        "running_jobs": running_jobs,
        "failed_jobs": failed_jobs,
        "invalid_jobs": invalid_jobs,
        "paused_jobs": paused_jobs,
        "never_run_jobs": never_run_jobs,
        "system_job_count": system_job_count,
        "plugin_job_count": plugin_job_count,
        "next_run_at": next_run_at_text,
        "latest_failed_job_name": str(latest_failed_row.get("name") or "").strip(),
        "latest_failed_error": latest_failed_error,
    }

    if total_jobs <= 0:
        snapshot["status"] = "warning"
        snapshot["summary"] = "当前没有加载任何定时任务"
        return snapshot

    if invalid_jobs > 0:
        snapshot["status"] = "danger"
        snapshot["summary"] = f"发现 {invalid_jobs} 个任务调度配置非法，建议立即检查任务页"
        return snapshot

    if failed_jobs > 0:
        snapshot["status"] = "warning"
        snapshot["summary"] = (
            f"最近有 {failed_jobs} 个任务执行失败，"
            f"下一次执行 {next_run_at_text or '暂未计算'}"
        )
        return snapshot

    if enabled_jobs <= 0:
        snapshot["status"] = "warning"
        snapshot["summary"] = "任务已加载，但当前没有启用中的调度任务"
        return snapshot

    if running_jobs > 0:
        snapshot["summary"] = (
            f"当前有 {running_jobs} 个任务执行中，"
            f"下一次执行 {next_run_at_text or '暂未计算'}"
        )
        return snapshot

    snapshot["summary"] = f"已启用 {enabled_jobs} 个任务，下一次执行 {next_run_at_text or '暂未计算'}"
    return snapshot


def _legacy_llm_to_catalog(legacy_llm: dict) -> dict:
    """把旧 llm(backends/scenes) 结构转换为新目录结构（仅用于兜底展示）。

    说明：
    1. 该转换不写库，只用于当目录表不可用时让后台页面仍可展示；
    2. 规则与 DB bootstrap 一致：dify backend 拆成 provider+dify_app，其他保留为 backend。
    """
    llm = legacy_llm or {}
    old_backends = llm.get("backends", {}) or {}
    old_scenes = llm.get("scenes", {}) or {}
    default_backend = str(llm.get("default_backend") or "").strip()

    providers = []
    dify_apps = []
    backends = []
    scenes = []

    dify_template_cfg = None
    for backend in old_backends.values():
        if isinstance(backend, dict) and str(backend.get("provider") or "").strip().lower() == "dify":
            dify_template_cfg = dict(backend)
            break
    if dify_template_cfg:
        providers.append(
            {
                "name": "dify_workflow_default",
                "provider_type": "dify",
                "enabled": True,
                "config": {
                    "provider": "dify",
                    "api_base_url": dify_template_cfg.get("api_base_url", ""),
                    "endpoint": dify_template_cfg.get("endpoint", "workflows/run"),
                    "mode": dify_template_cfg.get("mode", "workflow"),
                    "response_mode": dify_template_cfg.get("response_mode", "blocking"),
                    "request_timeout": dify_template_cfg.get("request_timeout", 60),
                    "max_retries": dify_template_cfg.get("max_retries", 3),
                    "retry_delay_seconds": dify_template_cfg.get("retry_delay_seconds", 1.0),
                },
            }
        )

    for backend_name, backend_cfg in old_backends.items():
        if not isinstance(backend_cfg, dict):
            continue
        provider = str(backend_cfg.get("provider") or "").strip().lower()
        if provider == "dify":
            dify_apps.append(
                {
                    "name": str(backend_name),
                    "provider_template": "dify_workflow_default",
                    "app_key": str(backend_cfg.get("api_key") or "").strip(),
                    "workflow_output_key": str(backend_cfg.get("workflow_output_key") or "text").strip(),
                    "enabled": True,
                    "config": {
                        "endpoint": backend_cfg.get("endpoint", ""),
                        "mode": backend_cfg.get("mode", ""),
                        "response_mode": backend_cfg.get("response_mode", ""),
                        "request_timeout": backend_cfg.get("request_timeout", ""),
                    },
                }
            )
        else:
            backends.append(
                {
                    "name": str(backend_name),
                    "enabled": True,
                    "config": dict(backend_cfg),
                }
            )

    if isinstance(old_scenes, dict) and old_scenes:
        for scene_name, backend_name in old_scenes.items():
            scene_name = str(scene_name or "").strip()
            backend_name = str(backend_name or "").strip()
            if not scene_name or not backend_name:
                continue
            backend_cfg = old_backends.get(backend_name, {}) or {}
            provider = str((backend_cfg or {}).get("provider") or "").strip().lower()
            scenes.append(
                {
                    "name": scene_name,
                    "target_type": "dify_app" if provider == "dify" else "backend",
                    "target_ref": backend_name,
                    "enabled": True,
                }
            )
    elif default_backend:
        default_cfg = old_backends.get(default_backend, {}) or {}
        provider = str((default_cfg or {}).get("provider") or "").strip().lower()
        scenes.append(
            {
                "name": "main.default",
                "target_type": "dify_app" if provider == "dify" else "backend",
                "target_ref": default_backend,
                "enabled": True,
            }
        )

    default_scene = scenes[0]["name"] if scenes else ""
    return {
        "default_scene": default_scene,
        "providers": providers,
        "dify_apps": dify_apps,
        "backends": backends,
        "scenes": scenes,
    }


def _load_llm_catalog_runtime() -> dict:
    """读取运行时 LLM 目录配置（优先 MySQL 新模型）。"""
    try:
        server = current_app.dashboard_server
        llm_catalog_db = getattr(server, "llm_catalog_db", None)
        if llm_catalog_db:
            catalog = llm_catalog_db.get_catalog() or {}
            if catalog and catalog.get("scenes"):
                return catalog
    except Exception as e:
        logger.warning(f"从 MySQL 读取 LLM 目录失败，回退 YAML: {e}")

    # 兜底：把 YAML 的 legacy llm 转成目录结构给后台展示。
    config_obj = _load_system_yaml()
    llm_config = config_obj.get("llm", {}) or {}
    if not isinstance(llm_config, dict):
        llm_config = {}
    return _legacy_llm_to_catalog(llm_config)


def _save_llm_catalog_runtime(catalog: dict) -> None:
    """保存运行时 LLM 目录配置到 MySQL。"""
    server = current_app.dashboard_server
    llm_catalog_db = getattr(server, "llm_catalog_db", None)
    if not llm_catalog_db:
        raise RuntimeError("llm_catalog_db 未初始化，无法保存 LLM 目录到 MySQL")
    ok = llm_catalog_db.save_catalog(catalog or {})
    if not ok:
        raise RuntimeError("保存 LLM 目录到 MySQL 失败")


def _plugins_root_path() -> str:
    """返回插件根目录绝对路径。"""
    return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'plugins'))


def _scan_plugin_llm_usage() -> list:
    """扫描各插件 config.toml，提取插件与 LLM 场景的引用关系。

    说明：
    1. 该扫描仅用于后台可视化，不会改写插件配置；
    2. 严格模式只采集 scene：顶层 section 写法，或嵌套在 llm/api/report_api 等节点；
    3. 返回结果用于“插件 -> scene -> backend”依赖拓扑展示。
    """
    plugins_root = _plugins_root_path()
    if not os.path.isdir(plugins_root):
        return []

    usages = []

    def _collect_refs(plugin_name: str, section_name: str, payload: dict) -> None:
        """从单个配置节点收集 scene 引用。"""
        if not isinstance(payload, dict):
            return
        scene_name = str(payload.get("scene") or "").strip()
        if not scene_name:
            return
        usages.append({
            "plugin": plugin_name,
            "section": section_name,
            "scene": scene_name,
        })

    for item in sorted(os.listdir(plugins_root)):
        plugin_dir = os.path.join(plugins_root, item)
        if not os.path.isdir(plugin_dir):
            continue
        config_path = os.path.join(plugin_dir, "config.toml")
        if not os.path.exists(config_path):
            continue
        try:
            config_obj = toml.load(config_path) or {}
        except Exception as e:
            logger.warning(f"扫描插件 LLM 依赖失败: plugin={item}, path={config_path}, error={e}")
            continue

        # 优先扫描每个 section：兼容 [Dify] / [api] / [Douyu.report_api] 等写法。
        for section_name, section_value in config_obj.items():
            if isinstance(section_value, dict):
                _collect_refs(item, str(section_name), section_value)
                # 二层兜底：处理 llm/api/report_api 等嵌套节点。
                for nested_name, nested_value in section_value.items():
                    if isinstance(nested_value, dict):
                        _collect_refs(item, f"{section_name}.{nested_name}", nested_value)
        # 顶层兜底：兼容极少数直接写在根节点的 scene。
        _collect_refs(item, "__root__", config_obj if isinstance(config_obj, dict) else {})

    # 去重：同插件同 section 仅保留一条记录，避免前后兜底重复。
    unique = {}
    for row in usages:
        key = f"{row.get('plugin')}::{row.get('section')}::{row.get('scene')}"
        unique[key] = row
    return sorted(unique.values(), key=lambda x: (x.get("plugin", ""), x.get("section", "")))


def _build_llm_topology() -> dict:
    """构建 LLM 拓扑视图（供后台页面直观展示依赖关系）。"""
    catalog = _load_llm_catalog_runtime()
    providers = {str(item.get("name") or "").strip(): item for item in (catalog.get("providers", []) or [])}
    dify_apps = {str(item.get("name") or "").strip(): item for item in (catalog.get("dify_apps", []) or [])}
    backends = {str(item.get("name") or "").strip(): item for item in (catalog.get("backends", []) or [])}
    scenes = {str(item.get("name") or "").strip(): item for item in (catalog.get("scenes", []) or [])}
    default_scene = str(catalog.get("default_scene") or "").strip()

    plugin_usages = _scan_plugin_llm_usage()
    topology_rows = []
    for usage in plugin_usages:
        scene_name = str(usage.get("scene") or "").strip()
        scene = scenes.get(scene_name, {}) or {}
        target_type = str(scene.get("target_type") or "").strip().lower()
        target_ref = str(scene.get("target_ref") or "").strip()

        resolved_provider = ""
        resolved_target = target_ref
        valid_target = False
        if target_type == "dify_app":
            app = dify_apps.get(target_ref, {}) or {}
            provider_name = str(app.get("provider_template") or "").strip()
            provider = providers.get(provider_name, {}) or {}
            resolved_provider = str(provider.get("provider_type") or "").strip()
            valid_target = bool(app and provider)
        elif target_type == "backend":
            backend = backends.get(target_ref, {}) or {}
            backend_cfg = (backend.get("config") or {}) if isinstance(backend, dict) else {}
            resolved_provider = str((backend_cfg or {}).get("provider") or "").strip()
            valid_target = bool(backend)

        topology_rows.append({
            "plugin": usage.get("plugin", ""),
            "section": usage.get("section", ""),
            "scene": scene_name,
            "target_type": target_type or "-",
            "target_ref": resolved_target or "-",
            "provider": resolved_provider or "-",
            "valid_scene": bool(scene_name in scenes),
            "valid_target": valid_target,
        })

    return {
        "default_scene": default_scene,
        "providers": catalog.get("providers", []) or [],
        "dify_apps": catalog.get("dify_apps", []) or [],
        "backends": catalog.get("backends", []) or [],
        "scenes": catalog.get("scenes", []) or [],
        "plugin_usages": plugin_usages,
        "topology_rows": topology_rows,
    }


@system_bp.route('/api_docs')
@login_required
def api_docs():
    src = request.args.get('src')
    if not src:
        try:
            server = current_app.dashboard_server
            cfg = getattr(server.robot, "ipad_config", {}) or {}
            src = cfg.get("server_url", "http://127.0.0.1:8059/")
        except Exception:
            src = "http://127.0.0.1:8059/"
    return render_template('api_docs.html', src_url=src)


@system_bp.route('/system_status')
@login_required
def system_status():
    # 资源监控页改为项目内置轻量面板：
    # 1. 不再依赖 glances 独立进程；
    # 2. 页面只消费当前服务自身的 API；
    # 3. 线上部署时不用额外开放 61208 之类的端口。
    return render_template('system_status.html')


@system_bp.route('/system_llm')
@login_required
def system_llm():
    return render_template('system_llm.html')


# 页面路由
@system_bp.route('/wx_logs')
@login_required
def wx_logs():
    return render_template('wx_logs.html')


# API路由
@system_bp.route('/api/system_info')
@login_required
def api_system_info():
    try:
        # 获取系统信息
        system_info = {
            "os": platform.system(),
            "os_version": platform.version(),
            "python_version": platform.python_version(),
            "cpu_usage": psutil.cpu_percent(),
            "memory_usage": psutil.virtual_memory().percent,
            "disk_usage": psutil.disk_usage('/').percent,
            "uptime": time.time() - APP_START_TIME,  # 使用应用启动时间计算运行时长
            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            "open_files": len(psutil.Process(os.getpid()).open_files())
        }

        return jsonify({"success": True, "data": system_info})
    except Exception as e:
        logger.error(f"获取系统信息失败: {e}")
        return jsonify({"success": False, "error": str(e)}), 500


@system_bp.route('/api/system_status_overview')
@login_required
def api_system_status_overview():
    """返回资源监控页使用的轻量服务器快照。"""
    try:
        return jsonify({
            "success": True,
            "data": _extract_server_runtime_snapshot(),
        })
    except Exception as e:
        logger.error(f"获取资源监控快照失败: {e}")
        return jsonify({"success": False, "error": str(e)}), 500


@system_bp.route('/api/system_health_summary')
@login_required
def api_system_health_summary():
    """聚合首页可观测性所需的关键健康信号。"""
    try:
        server = current_app.dashboard_server
        robot = getattr(server, "robot", None)
        plugin_manager = getattr(server, "plugin_manager", None)
        plugin_map = getattr(plugin_manager, "plugins", {}) or {}

        # 统计插件运行状态，便于首页快速判断“加载了多少、真正跑起来多少、是否有异常插件”。
        plugin_status_counter = {
            "total": len(plugin_map),
            "running": 0,
            "loaded": 0,
            "stopped": 0,
            "error": 0,
            "unloaded": 0,
            "unknown": 0,
        }
        for plugin in plugin_map.values():
            status = getattr(plugin, "status", None)
            if status == PluginStatus.RUNNING:
                plugin_status_counter["running"] += 1
            elif status == PluginStatus.LOADED:
                plugin_status_counter["loaded"] += 1
            elif status == PluginStatus.STOPPED:
                plugin_status_counter["stopped"] += 1
            elif status == PluginStatus.ERROR:
                plugin_status_counter["error"] += 1
            elif status == PluginStatus.UNLOADED:
                plugin_status_counter["unloaded"] += 1
            else:
                plugin_status_counter["unknown"] += 1

        # 错误数量直接复用现有统计库，避免为了首页卡片再单独写一套 SQL。
        _, recent_error_count = server.stats_db.get_error_logs(days=1, page=1, limit=1)

        # 基础设施健康：
        # 1. MySQL / Redis 都在这里做“首页摘要级”探测，而不是完整深度巡检；
        # 2. 除了连通性，还补充少量负载指标，方便管理员快速判断是否需要继续下钻；
        # 3. 即使探测失败也只反馈到看板，不影响主接口整体返回。
        mysql_snapshot = _extract_mysql_runtime_snapshot(server.db_manager)
        redis_snapshot = _extract_redis_runtime_snapshot(server.db_manager)

        # 首页只需要“够判断”的轻量结论，因此统一产出 status + summary 文本，前端无需重复拼装业务规则。
        robot_running = bool(getattr(robot, "ipad_running", False))
        robot_nickname = str(getattr(robot, "nickname", "") or "").strip()
        robot_wxid = str(getattr(robot, "wxid", "") or "").strip()
        robot_summary = "已连接并正在处理消息" if robot_running else "未连接或主循环未运行"
        if robot_nickname or robot_wxid:
            robot_summary = f"{robot_summary} · {robot_nickname or robot_wxid}"

        if plugin_status_counter["error"] > 0:
            plugin_status = "warning"
            plugin_summary = f"异常 {plugin_status_counter['error']} 个，运行中 {plugin_status_counter['running']} / {plugin_status_counter['total']}"
        elif plugin_status_counter["running"] == 0 and plugin_status_counter["total"] > 0:
            plugin_status = "warning"
            plugin_summary = f"暂无运行中插件，共加载 {plugin_status_counter['total']} 个"
        else:
            plugin_status = "healthy"
            plugin_summary = f"运行中 {plugin_status_counter['running']} / {plugin_status_counter['total']}"

        if recent_error_count > 0:
            error_status = "warning"
            error_summary = f"近 24 小时记录到 {recent_error_count} 条异常"
        else:
            error_status = "healthy"
            error_summary = "近 24 小时未记录到异常"

        # 首页 AI 卡片升级为“运行态 + 路由摘要”，仍然保持被动观测，不主动探活。
        ai_runtime = _extract_ai_runtime_snapshot()

        # Markdown 转图更适合保留在专门页面里排障，首页右侧改成更通用的任务调度摘要。
        scheduler_runtime = _extract_scheduler_runtime_snapshot()

        return jsonify({
            "success": True,
            "data": {
                "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                "robot": {
                    "status": "healthy" if robot_running else "danger",
                    "running": robot_running,
                    "nickname": robot_nickname,
                    "wxid": robot_wxid,
                    "summary": robot_summary,
                },
                "plugins": {
                    "status": plugin_status,
                    "summary": plugin_summary,
                    **plugin_status_counter,
                },
                "errors": {
                    "status": error_status,
                    "recent_24h_count": recent_error_count,
                    "summary": error_summary,
                },
                "infrastructure": {
                    "status": (
                        "danger"
                        if "danger" in {mysql_snapshot.get("status"), redis_snapshot.get("status")}
                        else ("warning" if "warning" in {mysql_snapshot.get("status"), redis_snapshot.get("status")} else "healthy")
                    ),
                    "summary": (
                        "MySQL / Redis 均正常"
                        if mysql_snapshot.get("status") == "healthy" and redis_snapshot.get("status") == "healthy"
                        else (
                            "基础设施连接正常，但部分负载指标需要关注"
                            if mysql_snapshot.get("status") != "danger" and redis_snapshot.get("status") != "danger"
                            else "存在基础设施连接异常"
                        )
                    ),
                    "mysql": mysql_snapshot,
                    "redis": redis_snapshot,
                },
                "ai_runtime": {
                    **ai_runtime,
                },
                "scheduler": {
                    **scheduler_runtime,
                },
            }
        })
    except Exception as e:
        logger.error(f"获取系统健康摘要失败: {e}")
        return jsonify({"success": False, "error": str(e)}), 500


@system_bp.route('/api/wx_logs')
@login_required
def api_wx_logs():
    try:
        log_type = request.args.get('type', 'info')  # 默认显示info日志
        lines = request.args.get('lines', 100, type=int)  # 默认显示最后100行

        # 修正日志文件路径计算，获取项目根目录
        project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..','logs'))

        if log_type == 'error':
            log_file = os.path.join(project_root, 'wx_error.log')
        elif log_type == 'debug':
            log_file = os.path.join(project_root, 'wx_debug.log')
        else:
            log_file = os.path.join(project_root, 'wx_info.log')

        log_content = []
        if os.path.exists(log_file):
            try:
                chunk_size = 8192
                with open(log_file, 'rb') as f:
                    f.seek(0, os.SEEK_END)
                    size = f.tell()
                    buffer = b""
                    pos = size
                    while pos > 0 and buffer.count(b'\n') <= lines:
                        read_size = chunk_size if pos >= chunk_size else pos
                        pos -= read_size
                        f.seek(pos)
                        buffer = f.read(read_size) + buffer
                    log_content = [b.decode('utf-8', errors='ignore') for b in buffer.splitlines()[-lines:]]
            except Exception as e:
                logger.error(f"高效读取日志失败，回退到常规方式: {e}")
                with open(log_file, 'r', encoding='utf-8', errors='ignore') as f:
                    log_content = list(deque(f, lines))
        else:
            logger.warning(f"日志文件不存在: {log_file}")
            # 尝试列出项目根目录下的所有日志文件，帮助调试
            try:
                all_files = [f for f in os.listdir(project_root) if f.endswith('.log')]
                logger.info(f"项目根目录下的日志文件: {all_files}")
            except Exception as e:
                logger.error(f"列出目录文件失败: {e}")

        payload = {
            "success": True,
            "data": {
                "log_type": log_type,
                "log_file": log_file,
                "content": log_content,
                "lines": len(log_content)
            }
        }
        accept = request.headers.get('Accept-Encoding', '')
        if 'gzip' in accept.lower():
            body = json.dumps(payload, ensure_ascii=False).encode('utf-8')
            gz = gzip.compress(body, compresslevel=6)
            resp = Response(gz, mimetype='application/json')
            resp.headers['Content-Encoding'] = 'gzip'
            return resp
        return jsonify(payload)
    except Exception as e:
        logger.error(f"获取微信日志失败: {e}")
        return jsonify({"success": False, "error": str(e)}), 500


# 在现有路由下添加
@system_bp.route('/api/current_user_info', methods=['GET'])
@login_required
def get_current_user_info():
    """获取当前登录的微信用户信息"""
    dashboard_server = current_app.dashboard_server
    result = dashboard_server.get_current_user_info()
    return jsonify(result)


@system_bp.route('/api/system/config/raw', methods=['GET'])
@login_required
def get_system_config_raw():
    try:
        config_path = _system_config_path()
        with open(config_path, 'r', encoding='utf-8') as f:
            config_text = f.read()
        # 展示运行时目录中的目标对象（backend+dify_app），便于调试 scene 绑定。
        catalog = _load_llm_catalog_runtime()
        backend_names = [str(item.get("name") or "").strip() for item in (catalog.get("backends", []) or [])]
        app_names = [f"dify_app::{str(item.get('name') or '').strip()}" for item in (catalog.get("dify_apps", []) or [])]
        return jsonify({
            "success": True,
            "data": config_text,
            "path": config_path,
            "llm_backends": sorted([name for name in backend_names + app_names if name]),
        })
    except Exception as e:
        logger.error(f"读取系统配置失败: {e}")
        return jsonify({"success": False, "message": str(e)}), 500


@system_bp.route('/api/system/config/update', methods=['POST'])
@login_required
def update_system_config():
    try:
        server = current_app.dashboard_server
        data = request.get_json() or {}
        config_text = data.get("config_text")
        if config_text is None:
            return jsonify({"success": False, "message": "缺少配置内容"}), 400

        yaml.safe_load(config_text)
        config_path = _system_config_path()
        with open(config_path, 'w', encoding='utf-8') as f:
            f.write(config_text)

        if getattr(server, "robot", None) and getattr(server.robot, "config", None):
            server.robot.config.reload()
            # 保存 YAML 后立刻把运行时依赖对象同步一遍，避免必须重启进程才能读到新值。
            server.robot.apply_runtime_config(reload_catalog=True)
        else:
            # 即便当前没有可用 robot 实例，也尽量把 LLM 路由缓存清掉，避免后续请求短时间内读旧值。
            LLMRegistry.invalidate_cache()

        return jsonify({"success": True, "message": "全局配置已保存并应用到运行时"})
    except Exception as e:
        logger.error(f"保存系统配置失败: {e}")
        return jsonify({"success": False, "message": str(e)}), 500


@system_bp.route('/api/system/llm_config', methods=['GET'])
@login_required
def get_system_llm_config():
    try:
        catalog = _load_llm_catalog_runtime()
        providers = sorted((catalog.get("providers", []) or []), key=lambda item: str(item.get("name") or ""))
        dify_apps = sorted((catalog.get("dify_apps", []) or []), key=lambda item: str(item.get("name") or ""))
        backends = sorted((catalog.get("backends", []) or []), key=lambda item: str(item.get("name") or ""))
        scenes = sorted((catalog.get("scenes", []) or []), key=lambda item: str(item.get("name") or ""))
        topology = _build_llm_topology()
        return jsonify({
            "success": True,
            "data": {
                "default_scene": catalog.get("default_scene", ""),
                "providers": providers,
                "dify_apps": dify_apps,
                "backends": backends,
                "scenes": scenes,
                "topology_rows": topology.get("topology_rows", []),
                "plugin_usages": topology.get("plugin_usages", []),
                # 新目录模型主存储在 MySQL。
                "config_path": (
                    "mysql:t_llm_provider_templates + t_llm_dify_apps + "
                    "t_llm_backends + t_llm_scenes (fallback yaml)"
                ),
            }
        })
    except Exception as e:
        logger.error(f"读取全局 LLM 配置失败: {e}")
        return jsonify({"success": False, "message": str(e)}), 500


@system_bp.route('/api/system/llm_config', methods=['POST'])
@login_required
def update_system_llm_config():
    try:
        server = current_app.dashboard_server
        data = request.get_json() or {}
        default_scene = str(data.get("default_scene") or "").strip()
        provider_list = data.get("providers", []) or []
        dify_app_list = data.get("dify_apps", []) or []
        backend_list = data.get("backends", []) or []
        scene_list = data.get("scenes", []) or []

        if not isinstance(provider_list, list):
            return jsonify({"success": False, "message": "providers 格式不正确"}), 400
        if not isinstance(dify_app_list, list):
            return jsonify({"success": False, "message": "dify_apps 格式不正确"}), 400
        if not isinstance(backend_list, list):
            return jsonify({"success": False, "message": "backends 格式不正确"}), 400
        if not isinstance(scene_list, list):
            return jsonify({"success": False, "message": "scenes 格式不正确"}), 400

        # 目录级校验：先收集名字集合，便于 scene target 引用校验。
        provider_names = {
            str((item or {}).get("name") or "").strip()
            for item in provider_list
            if isinstance(item, dict) and str((item or {}).get("name") or "").strip()
        }
        dify_app_names = {
            str((item or {}).get("name") or "").strip()
            for item in dify_app_list
            if isinstance(item, dict) and str((item or {}).get("name") or "").strip()
        }
        backend_names = {
            str((item or {}).get("name") or "").strip()
            for item in backend_list
            if isinstance(item, dict) and str((item or {}).get("name") or "").strip()
        }

        for app in dify_app_list:
            if not isinstance(app, dict):
                continue
            app_name = str(app.get("name") or "").strip()
            if not app_name:
                continue
            provider_template = str(app.get("provider_template") or "").strip()
            if not provider_template:
                return jsonify({"success": False, "message": f"Dify应用 {app_name} 未绑定 Provider 模板"}), 400
            if provider_template not in provider_names:
                return jsonify({"success": False, "message": f"Dify应用 {app_name} 绑定的 Provider 不存在"}), 400
            app_key = str(app.get("app_key") or "").strip()
            if not app_key:
                return jsonify({"success": False, "message": f"Dify应用 {app_name} 缺少 app_key"}), 400

        scene_names = set()
        for scene in scene_list:
            if not isinstance(scene, dict):
                continue
            scene_name = str(scene.get("name") or "").strip()
            target_type = str(scene.get("target_type") or "").strip().lower()
            target_ref = str(scene.get("target_ref") or "").strip()
            if not scene_name:
                continue
            if scene_name in scene_names:
                return jsonify({"success": False, "message": f"场景名重复: {scene_name}"}), 400
            scene_names.add(scene_name)
            if target_type not in {"dify_app", "backend"}:
                return jsonify({"success": False, "message": f"场景 {scene_name} target_type 非法"}), 400
            if not target_ref:
                return jsonify({"success": False, "message": f"场景 {scene_name} 未绑定目标"}), 400
            if target_type == "dify_app" and target_ref not in dify_app_names:
                return jsonify({"success": False, "message": f"场景 {scene_name} 绑定的 dify_app 不存在"}), 400
            if target_type == "backend" and target_ref not in backend_names:
                return jsonify({"success": False, "message": f"场景 {scene_name} 绑定的 backend 不存在"}), 400

        if default_scene and default_scene not in scene_names:
            return jsonify({"success": False, "message": "默认场景不存在"}), 400

        catalog = {
            "default_scene": default_scene,
            "providers": provider_list,
            "dify_apps": dify_app_list,
            "backends": backend_list,
            "scenes": scene_list,
        }
        _save_llm_catalog_runtime(catalog)

        if getattr(server, "robot", None) and getattr(server.robot, "config", None):
            server.robot.config.reload()
            # LLM 目录保存到 MySQL 后，需要主动失效运行时缓存，保证插件下一次调用直接走新目录。
            server.robot.apply_runtime_config(reload_catalog=True)
        else:
            LLMRegistry.invalidate_cache()

        return jsonify({"success": True, "message": "全局 LLM 配置已保存并应用到运行时"})
    except Exception as e:
        logger.error(f"保存全局 LLM 配置失败: {e}")
        return jsonify({"success": False, "message": str(e)}), 500


@system_bp.route('/api/system/md2img_health', methods=['GET'])
@login_required
def get_md2img_health():
    """查询 Markdown 转图运行时健康状态。"""
    try:
        # 默认只读取状态，不主动拉起 runtime。
        # 当后台希望“刷新并顺便拉起”时，可传 ensure_runtime=true。
        ensure_runtime = str(request.args.get('ensure_runtime', 'false')).strip().lower() in {'1', 'true', 'yes', 'on'}
        data = get_md2img_health_snapshot(ensure_runtime=ensure_runtime)
        return jsonify({"success": True, "data": data})
    except Exception as e:
        logger.error(f"获取 md2img 健康状态失败: {e}")
        return jsonify({"success": False, "message": str(e)}), 500


@system_bp.route('/api/system/md2img_warmup', methods=['POST'])
@login_required
def trigger_md2img_warmup():
    """手动触发 Markdown 转图浏览器预热。"""
    try:
        payload = request.get_json(silent=True) or {}
        timeout_seconds = int(payload.get('timeout_seconds', 45) or 45)
        timeout_seconds = max(10, min(timeout_seconds, 180))
        ok = warmup_md2img_browser_sync(timeout_seconds=timeout_seconds)
        data = get_md2img_health_snapshot(ensure_runtime=False)
        if ok:
            return jsonify({
                "success": True,
                "message": f"预热完成（timeout={timeout_seconds}s）",
                "data": data,
            })
        return jsonify({
            "success": False,
            "message": f"预热失败（timeout={timeout_seconds}s），请查看运行日志",
            "data": data,
        }), 500
    except Exception as e:
        logger.error(f"触发 md2img 预热失败: {e}")
        return jsonify({"success": False, "message": str(e)}), 500


@system_bp.route('/api/restart_service', methods=['POST'])
@login_required
def restart_service():
    """调用项目根目录下的 restart.sh 重启服务"""
    try:
        project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
        script_path = os.path.join(project_root, 'restart.sh')

        if not os.path.exists(script_path):
            return jsonify({"success": False, "message": f"未找到脚本: {script_path}"}), 404

        subprocess.Popen(
            ['bash', script_path],
            cwd=project_root,
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,
            start_new_session=True
        )

        logger.warning(f"后台触发服务重启脚本: {script_path}")
        return jsonify({
            "success": True,
            "message": "已触发重启脚本，服务将在短时间内重启"
        })
    except Exception as e:
        logger.error(f"触发服务重启失败: {e}")
        return jsonify({"success": False, "message": str(e)}), 500