abot/admin/dashboard/blueprints/system.py

from flask import Blueprint, render_template, jsonify, request, send_from_directory, current_app, Response
from .auth import login_required
from loguru import logger
import os
import time
import subprocess
from datetime import datetime
import platform
import psutil
from collections import deque
import gzip
import json
import yaml
import toml
from utils.markdown_to_image import get_md2img_health_snapshot, warmup_md2img_browser_sync
from utils.ai.llm_registry import LLMRegistry
from base.plugin_common.plugin_interface import PluginStatus
from utils.ai.unified_llm import UnifiedLLMClient
from utils.decorator.async_job import async_job

# 创建系统信息蓝图
system_bp = Blueprint('system', __name__)

# 记录应用启动时间
APP_START_TIME = time.time()


def _system_config_path() -> str:
    return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'config.yaml'))


def _load_system_yaml() -> dict:
    config_path = _system_config_path()
    if not os.path.exists(config_path):
        return {}
    with open(config_path, 'r', encoding='utf-8') as f:
        return yaml.safe_load(f) or {}


def _save_system_yaml(config_obj: dict) -> None:
    config_path = _system_config_path()
    with open(config_path, 'w', encoding='utf-8') as f:
        yaml.safe_dump(config_obj, f, allow_unicode=True, sort_keys=False)


def _safe_int(value, default: int = 0) -> int:
    """把数据库 / Redis 返回的字符串数字安全转成整数。"""
    try:
        if value in (None, ""):
            return default
        return int(float(value))
    except (TypeError, ValueError):
        return default


def _safe_float(value, default: float = 0.0) -> float:
    """把数据库 / Redis 返回的值安全转成浮点数。"""
    try:
        if value in (None, ""):
            return default
        return float(value)
    except (TypeError, ValueError):
        return default


def _format_bytes_to_mb(value: int) -> float:
    """把字节数转换为 MB，保留两位小数便于首页摘要展示。"""
    return round((_safe_float(value, 0.0) / 1024 / 1024), 2)


def _extract_mysql_runtime_snapshot(db_manager) -> dict:
    """采集 MySQL 运行态摘要。

    首页目标不是替代 DBA 工具，而是让管理员一眼判断：
    1. 数据库是不是活着；
    2. 当前连接压力高不高；
    3. 当前库规模是否已经明显变大；
    4. 有没有必要继续深入到更专业的监控页排查。
    """
    snapshot = {
        "status": "healthy",
        "summary": "连接正常",
        "database": db_manager.get_mysql_database_name(),
        "version": "",
        "threads_connected": 0,
        "threads_running": 0,
        "max_connections": 0,
        "connection_usage_percent": 0.0,
        "questions_per_second": 0.0,
        "uptime_seconds": 0,
        "table_count": 0,
        "schema_size_mb": 0.0,
        "slow_query_threshold_ms": db_manager.get_slow_query_threshold_ms(),
    }

    mysql_conn = db_manager.get_mysql_connection()
    try:
        with mysql_conn.cursor(dictionary=True) as cursor:
            # 基础探活与版本识别：
            # 1. SELECT VERSION() 成本极低；
            # 2. 相比只做 SELECT 1，它还能顺便拿到版本信息；
            # 3. 首页卡片里显示版本，方便线上排查“是不是某台库版本不一致”。
            cursor.execute("SELECT VERSION() AS version, DATABASE() AS database_name")
            version_row = cursor.fetchone() or {}
            snapshot["version"] = str(version_row.get("version") or "").strip()
            snapshot["database"] = str(version_row.get("database_name") or snapshot["database"] or "").strip()

            cursor.execute(
                """
                SHOW GLOBAL STATUS
                WHERE Variable_name IN ('Threads_connected', 'Threads_running', 'Questions', 'Uptime')
                """
            )
            status_rows = cursor.fetchall() or []
            status_map = {
                str(row.get("Variable_name") or "").strip(): row.get("Value")
                for row in status_rows
            }

            cursor.execute(
                """
                SHOW GLOBAL VARIABLES
                WHERE Variable_name IN ('max_connections')
                """
            )
            variable_rows = cursor.fetchall() or []
            variable_map = {
                str(row.get("Variable_name") or "").strip(): row.get("Value")
                for row in variable_rows
            }

            # information_schema 聚合虽然比 SELECT 1 重一点，但仍属于轻量级元信息查询：
            # 1. 只在首页 30 秒级刷新一次，成本可接受；
            # 2. 能直接给出当前业务库表数量与体量变化；
            # 3. 对判断“是不是消息表膨胀导致后台变慢”很有帮助。
            cursor.execute(
                """
                SELECT
                    COUNT(*) AS table_count,
                    COALESCE(SUM(data_length + index_length), 0) AS schema_size_bytes
                FROM information_schema.tables
                WHERE table_schema = DATABASE()
                """
            )
            schema_row = cursor.fetchone() or {}

        snapshot["threads_connected"] = _safe_int(status_map.get("Threads_connected"))
        snapshot["threads_running"] = _safe_int(status_map.get("Threads_running"))
        snapshot["max_connections"] = _safe_int(variable_map.get("max_connections"))
        snapshot["uptime_seconds"] = _safe_int(status_map.get("Uptime"))
        total_questions = _safe_int(status_map.get("Questions"))
        if snapshot["uptime_seconds"] > 0:
            snapshot["questions_per_second"] = round(total_questions / snapshot["uptime_seconds"], 2)
        if snapshot["max_connections"] > 0:
            snapshot["connection_usage_percent"] = round(
                (snapshot["threads_connected"] / snapshot["max_connections"]) * 100,
                1,
            )
        snapshot["table_count"] = _safe_int(schema_row.get("table_count"))
        snapshot["schema_size_mb"] = _format_bytes_to_mb(schema_row.get("schema_size_bytes"))

        if snapshot["connection_usage_percent"] >= 80 or snapshot["threads_running"] >= 12:
            snapshot["status"] = "warning"
            snapshot["summary"] = (
                f"连接压力偏高：已连接 {snapshot['threads_connected']} / {snapshot['max_connections']}，"
                f"运行中线程 {snapshot['threads_running']}"
            )
        else:
            snapshot["summary"] = (
                f"连接正常：已连接 {snapshot['threads_connected']} / {snapshot['max_connections'] or '-'}，"
                f"QPS {snapshot['questions_per_second']}"
            )
        return snapshot
    except Exception as mysql_error:
        snapshot["status"] = "danger"
        snapshot["summary"] = f"MySQL 探测失败: {mysql_error}"
        return snapshot
    finally:
        mysql_conn.close()


def _extract_redis_runtime_snapshot(db_manager) -> dict:
    """采集 Redis 运行态摘要。"""
    redis_config = getattr(db_manager, "redis_config", {}) or {}
    snapshot = {
        "status": "healthy",
        "summary": "连接正常",
        "db_index": _safe_int(redis_config.get("db", 0)),
        "key_count": 0,
        "connected_clients": 0,
        "blocked_clients": 0,
        "ops_per_sec": 0,
        "used_memory_human": "",
        "used_memory_peak_human": "",
        "memory_usage_percent": 0.0,
        "uptime_seconds": 0,
        "hit_rate_percent": 0.0,
    }

    try:
        redis_conn = db_manager.get_redis_connection()
        redis_conn.ping()
        info = redis_conn.info() or {}
        snapshot["key_count"] = _safe_int(redis_conn.dbsize())
        snapshot["connected_clients"] = _safe_int(info.get("connected_clients"))
        snapshot["blocked_clients"] = _safe_int(info.get("blocked_clients"))
        snapshot["ops_per_sec"] = _safe_int(info.get("instantaneous_ops_per_sec"))
        snapshot["used_memory_human"] = str(info.get("used_memory_human") or "").strip()
        snapshot["used_memory_peak_human"] = str(info.get("used_memory_peak_human") or "").strip()
        snapshot["uptime_seconds"] = _safe_int(info.get("uptime_in_seconds"))

        maxmemory = _safe_int(info.get("maxmemory"))
        used_memory = _safe_int(info.get("used_memory"))
        if maxmemory > 0:
            snapshot["memory_usage_percent"] = round((used_memory / maxmemory) * 100, 1)

        keyspace_hits = _safe_int(info.get("keyspace_hits"))
        keyspace_misses = _safe_int(info.get("keyspace_misses"))
        if (keyspace_hits + keyspace_misses) > 0:
            snapshot["hit_rate_percent"] = round(
                (keyspace_hits / (keyspace_hits + keyspace_misses)) * 100,
                1,
            )

        if snapshot["blocked_clients"] > 0 or snapshot["memory_usage_percent"] >= 80:
            snapshot["status"] = "warning"
            snapshot["summary"] = (
                f"缓存压力需关注：keys {snapshot['key_count']}，"
                f"clients {snapshot['connected_clients']}，ops/s {snapshot['ops_per_sec']}"
            )
        else:
            snapshot["summary"] = (
                f"缓存正常：keys {snapshot['key_count']}，"
                f"clients {snapshot['connected_clients']}，ops/s {snapshot['ops_per_sec']}"
            )
        return snapshot
    except Exception as redis_error:
        snapshot["status"] = "danger"
        snapshot["summary"] = f"Redis 探测失败: {redis_error}"
        return snapshot


def _parse_snapshot_datetime(value: str | None) -> datetime | None:
    """把首页摘要里常用的时间字符串安全转换为 datetime。"""
    text = str(value or "").strip()
    if not text:
        return None
    try:
        return datetime.strptime(text, "%Y-%m-%d %H:%M:%S")
    except ValueError:
        return None


def _count_enabled_runtime_items(items) -> int:
    """统计启用项数量。

    兼容原因：
    1. 新版目录模型里 providers/backends/scenes 可能是 dict；
    2. 后台页面某些兜底逻辑里也可能给出 list；
    3. 旧配置没有 enabled 字段时，直接按存在即计数。
    """
    rows = []
    if isinstance(items, dict):
        rows = list(items.values())
    elif isinstance(items, list):
        rows = list(items)
    count = 0
    for row in rows:
        if not isinstance(row, dict):
            continue
        if "enabled" not in row or bool(row.get("enabled", True)):
            count += 1
    return count


def _extract_llm_catalog_summary() -> dict:
    """提取首页 LLM 路由配置摘要。

    这里不做真实调用探测，只回答两个问题：
    1. 运行时有没有可用的场景与目标；
    2. 管理员当前看到的调用记录，大致落到了哪一套路由上。
    """
    try:
        catalog = LLMRegistry.get_catalog() or {}
        if catalog:
            providers = catalog.get("providers", {}) or {}
            dify_apps = catalog.get("dify_apps", {}) or {}
            backends = catalog.get("backends", {}) or {}
            scenes = catalog.get("scenes", {}) or {}
            default_scene = str(catalog.get("default_scene") or "").strip()
            default_backend = str(LLMRegistry.get_scene_backend_name(default_scene) or "").strip() if default_scene else ""
            return {
                "provider_count": _count_enabled_runtime_items(providers),
                "scene_count": _count_enabled_runtime_items(scenes),
                "target_count": _count_enabled_runtime_items(backends) + _count_enabled_runtime_items(dify_apps),
                "default_scene": default_scene,
                "default_backend": default_backend,
                "has_routing": _count_enabled_runtime_items(scenes) > 0,
            }

        # 目录模型不存在时回退到 legacy 视图，至少让首页知道“有没有基础路由配置”。
        legacy_llm = LLMRegistry.get_llm_config() or {}
        scenes = legacy_llm.get("scenes", {}) or {}
        backends = legacy_llm.get("backends", {}) or {}
        default_backend = str(legacy_llm.get("default_backend") or "").strip()
        return {
            "provider_count": 0,
            "scene_count": len(scenes) if isinstance(scenes, dict) else 0,
            "target_count": len(backends) if isinstance(backends, dict) else 0,
            "default_scene": "",
            "default_backend": default_backend,
            "has_routing": bool(scenes) or bool(default_backend),
        }
    except Exception as llm_catalog_error:
        logger.warning(f"提取 LLM 路由摘要失败: {llm_catalog_error}")
        return {
            "provider_count": 0,
            "scene_count": 0,
            "target_count": 0,
            "default_scene": "",
            "default_backend": "",
            "has_routing": False,
        }


def _extract_ai_runtime_snapshot() -> dict:
    """构建首页 LLM 运行态摘要。

    设计原则：
    1. 首页只展示“最近调用窗口”的被动观测结果，不主动发请求探活；
    2. 把最近调用和静态路由配置拼在一起，避免管理员只看到“成功/失败”却不知道走的是哪条链路；
    3. 如果近期没有调用，也明确区分“未配置”和“已配置但当前空闲”。
    """
    runtime_snapshot = UnifiedLLMClient.get_runtime_snapshot() or {}
    last_call = dict(runtime_snapshot.get("last_call") or {})
    catalog_summary = _extract_llm_catalog_summary()

    total_calls = _safe_int(runtime_snapshot.get("total_calls"))
    failed_calls = _safe_int(runtime_snapshot.get("failed_calls"))
    success_rate = _safe_float(runtime_snapshot.get("success_rate"))
    avg_latency_ms = _safe_float(runtime_snapshot.get("avg_latency_ms"))
    last_error = str(runtime_snapshot.get("last_error") or "").strip()

    snapshot = {
        **runtime_snapshot,
        "last_call": last_call,
        "provider_count": catalog_summary.get("provider_count", 0),
        "scene_count": catalog_summary.get("scene_count", 0),
        "target_count": catalog_summary.get("target_count", 0),
        "default_scene": catalog_summary.get("default_scene", ""),
        "default_backend": catalog_summary.get("default_backend", ""),
        "has_routing": bool(catalog_summary.get("has_routing")),
        "last_provider": str(last_call.get("provider") or "").strip(),
        "last_backend": str(last_call.get("backend") or "").strip(),
        "last_scene": str(last_call.get("scene") or "").strip(),
        "last_model": str(last_call.get("model") or "").strip(),
        "last_timestamp": str(last_call.get("timestamp") or "").strip(),
        "last_latency_ms": _safe_float(last_call.get("latency_ms")),
    }

    if not snapshot["has_routing"]:
        snapshot["status"] = "warning"
        snapshot["summary"] = "当前未发现完整的 LLM 路由配置，建议先检查默认场景与后端绑定"
        return snapshot

    if total_calls <= 0:
        snapshot["status"] = "warning"
        snapshot["summary"] = (
            f"已配置 {snapshot['scene_count']} 个场景、{snapshot['target_count']} 个目标，"
            "最近窗口内暂无统一 LLM 调用记录"
        )
        return snapshot

    if failed_calls >= total_calls and total_calls > 0:
        snapshot["status"] = "danger"
        snapshot["summary"] = (
            f"最近 {total_calls} 次调用全部失败，成功率 {success_rate:.2f}%，"
            f"平均耗时 {avg_latency_ms:.2f}ms"
        )
        return snapshot

    if failed_calls > 0 or last_error:
        snapshot["status"] = "warning"
        snapshot["summary"] = (
            f"最近 {total_calls} 次调用中失败 {failed_calls} 次，成功率 {success_rate:.2f}%，"
            f"平均耗时 {avg_latency_ms:.2f}ms"
        )
        return snapshot

    snapshot["status"] = "healthy"
    snapshot["summary"] = (
        f"最近 {total_calls} 次调用全部成功，成功率 {success_rate:.2f}%，"
        f"平均耗时 {avg_latency_ms:.2f}ms"
    )
    return snapshot


def _build_llm_runtime_analytics_payload() -> dict:
    """构建 LLM 最近窗口分析载荷。

    为什么单独抽这个函数：
    1. 首页 AI 卡片只看摘要，而 `system_llm` 页面需要更细粒度的分组表；
    2. 两边都依赖同一套运行时快照，避免把 scene/backend/provider/model 聚合逻辑散在多个接口里；
    3. 第一阶段先做“最近窗口分析”，让管理员快速识别慢场景、失败模型和异常后端。
    """
    runtime_breakdown = UnifiedLLMClient.get_runtime_breakdown() or {}
    overview_snapshot = _extract_ai_runtime_snapshot()
    catalog_summary = _extract_llm_catalog_summary()

    return {
        "overview": {
            "window_size": _safe_int(runtime_breakdown.get("window_size")),
            "total_calls": _safe_int(runtime_breakdown.get("total_calls")),
            "success_calls": _safe_int(runtime_breakdown.get("success_calls")),
            "failed_calls": _safe_int(runtime_breakdown.get("failed_calls")),
            "success_rate": _safe_float(runtime_breakdown.get("success_rate")),
            "avg_latency_ms": _safe_float(runtime_breakdown.get("avg_latency_ms")),
            "last_error": str(runtime_breakdown.get("last_error") or "").strip(),
            "status": str(overview_snapshot.get("status") or "warning").strip(),
            "summary": str(overview_snapshot.get("summary") or "").strip(),
            "last_call": dict(runtime_breakdown.get("last_call") or {}),
            "provider_count": _safe_int(catalog_summary.get("provider_count")),
            "scene_count": _safe_int(catalog_summary.get("scene_count")),
            "target_count": _safe_int(catalog_summary.get("target_count")),
            "default_scene": str(catalog_summary.get("default_scene") or "").strip(),
            "default_backend": str(catalog_summary.get("default_backend") or "").strip(),
            "has_routing": bool(catalog_summary.get("has_routing")),
        },
        # 这里保留原始最近窗口明细，方便后续如果要做“最近 10 次调用”列表时直接复用。
        "recent_rows": runtime_breakdown.get("rows", []) or [],
        "by_scene": runtime_breakdown.get("by_scene", []) or [],
        "by_backend": runtime_breakdown.get("by_backend", []) or [],
        "by_provider": runtime_breakdown.get("by_provider", []) or [],
        "by_model": runtime_breakdown.get("by_model", []) or [],
    }


def _extract_scheduler_runtime_snapshot() -> dict:
    """聚合 async_job 运行态，生成首页任务调度摘要。

    这里的目标不是替代完整任务页，而是回答管理员最常问的几件事：
    1. 任务有没有正常装载；
    2. 是否存在失败或非法调度；
    3. 下一次任务大概何时执行；
    4. 当前更多是系统任务，还是插件任务在跑。
    """
    runtime_rows = async_job.get_jobs_snapshot()
    next_run_candidates = []
    failed_rows = []
    system_job_count = 0
    plugin_job_count = 0

    for row in runtime_rows:
        job_key = str(row.get("job_key") or "").strip()
        owner_name = str(row.get("owner_name") or "system").strip().lower()
        next_run_at = _parse_snapshot_datetime(row.get("next_run_at"))
        last_status = str(row.get("last_status") or "").strip().lower()

        if job_key.startswith("plugin_schedule:") or owner_name != "system":
            plugin_job_count += 1
        else:
            system_job_count += 1

        if bool(row.get("enabled")) and next_run_at:
            next_run_candidates.append(next_run_at)
        if last_status in {"failed", "invalid_schedule"}:
            failed_rows.append(row)

    latest_failed_row = {}
    if failed_rows:
        failed_rows.sort(
            key=lambda row: (
                _parse_snapshot_datetime(row.get("updated_at"))
                or _parse_snapshot_datetime(row.get("last_run_at"))
                or datetime.min
            ),
            reverse=True,
        )
        latest_failed_row = failed_rows[0]

    invalid_jobs = sum(
        1 for row in runtime_rows if str(row.get("last_status") or "").strip().lower() == "invalid_schedule"
    )
    total_jobs = len(runtime_rows)
    enabled_jobs = sum(1 for row in runtime_rows if bool(row.get("enabled")))
    running_jobs = sum(1 for row in runtime_rows if bool(row.get("running")))
    failed_jobs = len(failed_rows)
    paused_jobs = total_jobs - enabled_jobs
    never_run_jobs = sum(1 for row in runtime_rows if str(row.get("last_status") or "").strip().lower() == "never")
    next_run_at_text = min(next_run_candidates).strftime("%Y-%m-%d %H:%M:%S") if next_run_candidates else ""
    latest_failed_error = str(latest_failed_row.get("last_error") or "").strip()
    if len(latest_failed_error) > 120:
        latest_failed_error = f"{latest_failed_error[:117]}..."

    snapshot = {
        "status": "healthy",
        "summary": "任务调度运行正常",
        "total_jobs": total_jobs,
        "enabled_jobs": enabled_jobs,
        "running_jobs": running_jobs,
        "failed_jobs": failed_jobs,
        "invalid_jobs": invalid_jobs,
        "paused_jobs": paused_jobs,
        "never_run_jobs": never_run_jobs,
        "system_job_count": system_job_count,
        "plugin_job_count": plugin_job_count,
        "next_run_at": next_run_at_text,
        "latest_failed_job_name": str(latest_failed_row.get("name") or "").strip(),
        "latest_failed_error": latest_failed_error,
    }

    if total_jobs <= 0:
        snapshot["status"] = "warning"
        snapshot["summary"] = "当前没有加载任何定时任务"
        return snapshot

    if invalid_jobs > 0:
        snapshot["status"] = "danger"
        snapshot["summary"] = f"发现 {invalid_jobs} 个任务调度配置非法，建议立即检查任务页"
        return snapshot

    if failed_jobs > 0:
        snapshot["status"] = "warning"
        snapshot["summary"] = (
            f"最近有 {failed_jobs} 个任务执行失败，"
            f"下一次执行 {next_run_at_text or '暂未计算'}"
        )
        return snapshot

    if enabled_jobs <= 0:
        snapshot["status"] = "warning"
        snapshot["summary"] = "任务已加载，但当前没有启用中的调度任务"
        return snapshot

    if running_jobs > 0:
        snapshot["summary"] = (
            f"当前有 {running_jobs} 个任务执行中，"
            f"下一次执行 {next_run_at_text or '暂未计算'}"
        )
        return snapshot

    snapshot["summary"] = f"已启用 {enabled_jobs} 个任务，下一次执行 {next_run_at_text or '暂未计算'}"
    return snapshot


def _legacy_llm_to_catalog(legacy_llm: dict) -> dict:
    """把旧 llm(backends/scenes) 结构转换为新目录结构（仅用于兜底展示）。

    说明：
    1. 该转换不写库，只用于当目录表不可用时让后台页面仍可展示；
    2. 规则与 DB bootstrap 一致：dify backend 拆成 provider+dify_app，其他保留为 backend。
    """
    llm = legacy_llm or {}
    old_backends = llm.get("backends", {}) or {}
    old_scenes = llm.get("scenes", {}) or {}
    default_backend = str(llm.get("default_backend") or "").strip()

    providers = []
    dify_apps = []
    backends = []
    scenes = []

    dify_template_cfg = None
    for backend in old_backends.values():
        if isinstance(backend, dict) and str(backend.get("provider") or "").strip().lower() == "dify":
            dify_template_cfg = dict(backend)
            break
    if dify_template_cfg:
        providers.append(
            {
                "name": "dify_workflow_default",
                "provider_type": "dify",
                "enabled": True,
                "config": {
                    "provider": "dify",
                    "api_base_url": dify_template_cfg.get("api_base_url", ""),
                    "endpoint": dify_template_cfg.get("endpoint", "workflows/run"),
                    "mode": dify_template_cfg.get("mode", "workflow"),
                    "response_mode": dify_template_cfg.get("response_mode", "blocking"),
                    "request_timeout": dify_template_cfg.get("request_timeout", 60),
                    "max_retries": dify_template_cfg.get("max_retries", 3),
                    "retry_delay_seconds": dify_template_cfg.get("retry_delay_seconds", 1.0),
                },
            }
        )

    for backend_name, backend_cfg in old_backends.items():
        if not isinstance(backend_cfg, dict):
            continue
        provider = str(backend_cfg.get("provider") or "").strip().lower()
        if provider == "dify":
            dify_apps.append(
                {
                    "name": str(backend_name),
                    "provider_template": "dify_workflow_default",
                    "app_key": str(backend_cfg.get("api_key") or "").strip(),
                    "workflow_output_key": str(backend_cfg.get("workflow_output_key") or "text").strip(),
                    "enabled": True,
                    "config": {
                        "endpoint": backend_cfg.get("endpoint", ""),
                        "mode": backend_cfg.get("mode", ""),
                        "response_mode": backend_cfg.get("response_mode", ""),
                        "request_timeout": backend_cfg.get("request_timeout", ""),
                    },
                }
            )
        else:
            backends.append(
                {
                    "name": str(backend_name),
                    "enabled": True,
                    "config": dict(backend_cfg),
                }
            )

    if isinstance(old_scenes, dict) and old_scenes:
        for scene_name, backend_name in old_scenes.items():
            scene_name = str(scene_name or "").strip()
            backend_name = str(backend_name or "").strip()
            if not scene_name or not backend_name:
                continue
            backend_cfg = old_backends.get(backend_name, {}) or {}
            provider = str((backend_cfg or {}).get("provider") or "").strip().lower()
            scenes.append(
                {
                    "name": scene_name,
                    "target_type": "dify_app" if provider == "dify" else "backend",
                    "target_ref": backend_name,
                    "enabled": True,
                }
            )
    elif default_backend:
        default_cfg = old_backends.get(default_backend, {}) or {}
        provider = str((default_cfg or {}).get("provider") or "").strip().lower()
        scenes.append(
            {
                "name": "main.default",
                "target_type": "dify_app" if provider == "dify" else "backend",
                "target_ref": default_backend,
                "enabled": True,
            }
        )

    default_scene = scenes[0]["name"] if scenes else ""
    return {
        "default_scene": default_scene,
        "providers": providers,
        "dify_apps": dify_apps,
        "backends": backends,
        "scenes": scenes,
    }


def _load_llm_catalog_runtime() -> dict:
    """读取运行时 LLM 目录配置（优先 MySQL 新模型）。"""
    try:
        server = current_app.dashboard_server
        llm_catalog_db = getattr(server, "llm_catalog_db", None)
        if llm_catalog_db:
            catalog = llm_catalog_db.get_catalog() or {}
            if catalog and catalog.get("scenes"):
                return catalog
    except Exception as e:
        logger.warning(f"从 MySQL 读取 LLM 目录失败，回退 YAML: {e}")

    # 兜底：把 YAML 的 legacy llm 转成目录结构给后台展示。
    config_obj = _load_system_yaml()
    llm_config = config_obj.get("llm", {}) or {}
    if not isinstance(llm_config, dict):
        llm_config = {}
    return _legacy_llm_to_catalog(llm_config)


def _save_llm_catalog_runtime(catalog: dict) -> None:
    """保存运行时 LLM 目录配置到 MySQL。"""
    server = current_app.dashboard_server
    llm_catalog_db = getattr(server, "llm_catalog_db", None)
    if not llm_catalog_db:
        raise RuntimeError("llm_catalog_db 未初始化，无法保存 LLM 目录到 MySQL")
    ok = llm_catalog_db.save_catalog(catalog or {})
    if not ok:
        raise RuntimeError("保存 LLM 目录到 MySQL 失败")


def _plugins_root_path() -> str:
    """返回插件根目录绝对路径。"""
    return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'plugins'))


def _scan_plugin_llm_usage() -> list:
    """扫描各插件 config.toml，提取插件与 LLM 场景的引用关系。

    说明：
    1. 该扫描仅用于后台可视化，不会改写插件配置；
    2. 严格模式只采集 scene：顶层 section 写法，或嵌套在 llm/api/report_api 等节点；
    3. 返回结果用于“插件 -> scene -> backend”依赖拓扑展示。
    """
    plugins_root = _plugins_root_path()
    if not os.path.isdir(plugins_root):
        return []

    usages = []

    def _collect_refs(plugin_name: str, section_name: str, payload: dict) -> None:
        """从单个配置节点收集 scene 引用。"""
        if not isinstance(payload, dict):
            return
        scene_name = str(payload.get("scene") or "").strip()
        if not scene_name:
            return
        usages.append({
            "plugin": plugin_name,
            "section": section_name,
            "scene": scene_name,
        })

    for item in sorted(os.listdir(plugins_root)):
        plugin_dir = os.path.join(plugins_root, item)
        if not os.path.isdir(plugin_dir):
            continue
        config_path = os.path.join(plugin_dir, "config.toml")
        if not os.path.exists(config_path):
            continue
        try:
            config_obj = toml.load(config_path) or {}
        except Exception as e:
            logger.warning(f"扫描插件 LLM 依赖失败: plugin={item}, path={config_path}, error={e}")
            continue

        # 优先扫描每个 section：兼容 [Dify] / [api] / [Douyu.report_api] 等写法。
        for section_name, section_value in config_obj.items():
            if isinstance(section_value, dict):
                _collect_refs(item, str(section_name), section_value)
                # 二层兜底：处理 llm/api/report_api 等嵌套节点。
                for nested_name, nested_value in section_value.items():
                    if isinstance(nested_value, dict):
                        _collect_refs(item, f"{section_name}.{nested_name}", nested_value)
        # 顶层兜底：兼容极少数直接写在根节点的 scene。
        _collect_refs(item, "__root__", config_obj if isinstance(config_obj, dict) else {})

    # 去重：同插件同 section 仅保留一条记录，避免前后兜底重复。
    unique = {}
    for row in usages:
        key = f"{row.get('plugin')}::{row.get('section')}::{row.get('scene')}"
        unique[key] = row
    return sorted(unique.values(), key=lambda x: (x.get("plugin", ""), x.get("section", "")))


def _build_llm_topology() -> dict:
    """构建 LLM 拓扑视图（供后台页面直观展示依赖关系）。"""
    catalog = _load_llm_catalog_runtime()
    providers = {str(item.get("name") or "").strip(): item for item in (catalog.get("providers", []) or [])}
    dify_apps = {str(item.get("name") or "").strip(): item for item in (catalog.get("dify_apps", []) or [])}
    backends = {str(item.get("name") or "").strip(): item for item in (catalog.get("backends", []) or [])}
    scenes = {str(item.get("name") or "").strip(): item for item in (catalog.get("scenes", []) or [])}
    default_scene = str(catalog.get("default_scene") or "").strip()

    plugin_usages = _scan_plugin_llm_usage()
    topology_rows = []
    for usage in plugin_usages:
        scene_name = str(usage.get("scene") or "").strip()
        scene = scenes.get(scene_name, {}) or {}
        target_type = str(scene.get("target_type") or "").strip().lower()
        target_ref = str(scene.get("target_ref") or "").strip()

        resolved_provider = ""
        resolved_target = target_ref
        valid_target = False
        if target_type == "dify_app":
            app = dify_apps.get(target_ref, {}) or {}
            provider_name = str(app.get("provider_template") or "").strip()
            provider = providers.get(provider_name, {}) or {}
            resolved_provider = str(provider.get("provider_type") or "").strip()
            valid_target = bool(app and provider)
        elif target_type == "backend":
            backend = backends.get(target_ref, {}) or {}
            backend_cfg = (backend.get("config") or {}) if isinstance(backend, dict) else {}
            resolved_provider = str((backend_cfg or {}).get("provider") or "").strip()
            valid_target = bool(backend)

        topology_rows.append({
            "plugin": usage.get("plugin", ""),
            "section": usage.get("section", ""),
            "scene": scene_name,
            "target_type": target_type or "-",
            "target_ref": resolved_target or "-",
            "provider": resolved_provider or "-",
            "valid_scene": bool(scene_name in scenes),
            "valid_target": valid_target,
        })

    return {
        "default_scene": default_scene,
        "providers": catalog.get("providers", []) or [],
        "dify_apps": catalog.get("dify_apps", []) or [],
        "backends": catalog.get("backends", []) or [],
        "scenes": catalog.get("scenes", []) or [],
        "plugin_usages": plugin_usages,
        "topology_rows": topology_rows,
    }


@system_bp.route('/api_docs')
@login_required
def api_docs():
    src = request.args.get('src')
    if not src:
        try:
            server = current_app.dashboard_server
            cfg = getattr(server.robot, "ipad_config", {}) or {}
            src = cfg.get("server_url", "http://127.0.0.1:8059/")
        except Exception:
            src = "http://127.0.0.1:8059/"
    return render_template('api_docs.html', src_url=src)


@system_bp.route('/system_status')
@login_required
def system_status():
    src = request.args.get('src')
    if not src:
        try:
            server = current_app.dashboard_server
            glances = getattr(server.robot, "config").glances if hasattr(server.robot, "config") else {}
            host = glances.get("host", "127.0.0.1")
            port = glances.get("port", 61208)
            src = f"http://{host}:{port}/"
        except Exception:
            src = "http://127.0.0.1:61208/"
    return render_template('system_status.html', src_url=src)


@system_bp.route('/system_llm')
@login_required
def system_llm():
    return render_template('system_llm.html')


# 页面路由
@system_bp.route('/wx_logs')
@login_required
def wx_logs():
    return render_template('wx_logs.html')


# API路由
@system_bp.route('/api/system_info')
@login_required
def api_system_info():
    try:
        # 获取系统信息
        system_info = {
            "os": platform.system(),
            "os_version": platform.version(),
            "python_version": platform.python_version(),
            "cpu_usage": psutil.cpu_percent(),
            "memory_usage": psutil.virtual_memory().percent,
            "disk_usage": psutil.disk_usage('/').percent,
            "uptime": time.time() - APP_START_TIME,  # 使用应用启动时间计算运行时长
            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            "open_files": len(psutil.Process(os.getpid()).open_files())
        }

        return jsonify({"success": True, "data": system_info})
    except Exception as e:
        logger.error(f"获取系统信息失败: {e}")
        return jsonify({"success": False, "error": str(e)}), 500


@system_bp.route('/api/system_health_summary')
@login_required
def api_system_health_summary():
    """聚合首页可观测性所需的关键健康信号。"""
    try:
        server = current_app.dashboard_server
        robot = getattr(server, "robot", None)
        plugin_manager = getattr(server, "plugin_manager", None)
        plugin_map = getattr(plugin_manager, "plugins", {}) or {}

        # 统计插件运行状态，便于首页快速判断“加载了多少、真正跑起来多少、是否有异常插件”。
        plugin_status_counter = {
            "total": len(plugin_map),
            "running": 0,
            "loaded": 0,
            "stopped": 0,
            "error": 0,
            "unloaded": 0,
            "unknown": 0,
        }
        for plugin in plugin_map.values():
            status = getattr(plugin, "status", None)
            if status == PluginStatus.RUNNING:
                plugin_status_counter["running"] += 1
            elif status == PluginStatus.LOADED:
                plugin_status_counter["loaded"] += 1
            elif status == PluginStatus.STOPPED:
                plugin_status_counter["stopped"] += 1
            elif status == PluginStatus.ERROR:
                plugin_status_counter["error"] += 1
            elif status == PluginStatus.UNLOADED:
                plugin_status_counter["unloaded"] += 1
            else:
                plugin_status_counter["unknown"] += 1

        # 错误数量直接复用现有统计库，避免为了首页卡片再单独写一套 SQL。
        _, recent_error_count = server.stats_db.get_error_logs(days=1, page=1, limit=1)

        # 基础设施健康：
        # 1. MySQL / Redis 都在这里做“首页摘要级”探测，而不是完整深度巡检；
        # 2. 除了连通性，还补充少量负载指标，方便管理员快速判断是否需要继续下钻；
        # 3. 即使探测失败也只反馈到看板，不影响主接口整体返回。
        mysql_snapshot = _extract_mysql_runtime_snapshot(server.db_manager)
        redis_snapshot = _extract_redis_runtime_snapshot(server.db_manager)

        # 首页只需要“够判断”的轻量结论，因此统一产出 status + summary 文本，前端无需重复拼装业务规则。
        robot_running = bool(getattr(robot, "ipad_running", False))
        robot_nickname = str(getattr(robot, "nickname", "") or "").strip()
        robot_wxid = str(getattr(robot, "wxid", "") or "").strip()
        robot_summary = "已连接并正在处理消息" if robot_running else "未连接或主循环未运行"
        if robot_nickname or robot_wxid:
            robot_summary = f"{robot_summary} · {robot_nickname or robot_wxid}"

        if plugin_status_counter["error"] > 0:
            plugin_status = "warning"
            plugin_summary = f"异常 {plugin_status_counter['error']} 个，运行中 {plugin_status_counter['running']} / {plugin_status_counter['total']}"
        elif plugin_status_counter["running"] == 0 and plugin_status_counter["total"] > 0:
            plugin_status = "warning"
            plugin_summary = f"暂无运行中插件，共加载 {plugin_status_counter['total']} 个"
        else:
            plugin_status = "healthy"
            plugin_summary = f"运行中 {plugin_status_counter['running']} / {plugin_status_counter['total']}"

        if recent_error_count > 0:
            error_status = "warning"
            error_summary = f"近 24 小时记录到 {recent_error_count} 条异常"
        else:
            error_status = "healthy"
            error_summary = "近 24 小时未记录到异常"

        # 首页 AI 卡片升级为“运行态 + 路由摘要”，仍然保持被动观测，不主动探活。
        ai_runtime = _extract_ai_runtime_snapshot()

        # Markdown 转图更适合保留在专门页面里排障，首页右侧改成更通用的任务调度摘要。
        scheduler_runtime = _extract_scheduler_runtime_snapshot()

        return jsonify({
            "success": True,
            "data": {
                "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                "robot": {
                    "status": "healthy" if robot_running else "danger",
                    "running": robot_running,
                    "nickname": robot_nickname,
                    "wxid": robot_wxid,
                    "summary": robot_summary,
                },
                "plugins": {
                    "status": plugin_status,
                    "summary": plugin_summary,
                    **plugin_status_counter,
                },
                "errors": {
                    "status": error_status,
                    "recent_24h_count": recent_error_count,
                    "summary": error_summary,
                },
                "infrastructure": {
                    "status": (
                        "danger"
                        if "danger" in {mysql_snapshot.get("status"), redis_snapshot.get("status")}
                        else ("warning" if "warning" in {mysql_snapshot.get("status"), redis_snapshot.get("status")} else "healthy")
                    ),
                    "summary": (
                        "MySQL / Redis 均正常"
                        if mysql_snapshot.get("status") == "healthy" and redis_snapshot.get("status") == "healthy"
                        else (
                            "基础设施连接正常，但部分负载指标需要关注"
                            if mysql_snapshot.get("status") != "danger" and redis_snapshot.get("status") != "danger"
                            else "存在基础设施连接异常"
                        )
                    ),
                    "mysql": mysql_snapshot,
                    "redis": redis_snapshot,
                },
                "ai_runtime": {
                    **ai_runtime,
                },
                "scheduler": {
                    **scheduler_runtime,
                },
            }
        })
    except Exception as e:
        logger.error(f"获取系统健康摘要失败: {e}")
        return jsonify({"success": False, "error": str(e)}), 500


@system_bp.route('/api/wx_logs')
@login_required
def api_wx_logs():
    try:
        log_type = request.args.get('type', 'info')  # 默认显示info日志
        lines = request.args.get('lines', 100, type=int)  # 默认显示最后100行

        # 修正日志文件路径计算，获取项目根目录
        project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..','logs'))

        if log_type == 'error':
            log_file = os.path.join(project_root, 'wx_error.log')
        elif log_type == 'debug':
            log_file = os.path.join(project_root, 'wx_debug.log')
        else:
            log_file = os.path.join(project_root, 'wx_info.log')

        log_content = []
        if os.path.exists(log_file):
            try:
                chunk_size = 8192
                with open(log_file, 'rb') as f:
                    f.seek(0, os.SEEK_END)
                    size = f.tell()
                    buffer = b""
                    pos = size
                    while pos > 0 and buffer.count(b'\n') <= lines:
                        read_size = chunk_size if pos >= chunk_size else pos
                        pos -= read_size
                        f.seek(pos)
                        buffer = f.read(read_size) + buffer
                    log_content = [b.decode('utf-8', errors='ignore') for b in buffer.splitlines()[-lines:]]
            except Exception as e:
                logger.error(f"高效读取日志失败，回退到常规方式: {e}")
                with open(log_file, 'r', encoding='utf-8', errors='ignore') as f:
                    log_content = list(deque(f, lines))
        else:
            logger.warning(f"日志文件不存在: {log_file}")
            # 尝试列出项目根目录下的所有日志文件，帮助调试
            try:
                all_files = [f for f in os.listdir(project_root) if f.endswith('.log')]
                logger.info(f"项目根目录下的日志文件: {all_files}")
            except Exception as e:
                logger.error(f"列出目录文件失败: {e}")

        payload = {
            "success": True,
            "data": {
                "log_type": log_type,
                "log_file": log_file,
                "content": log_content,
                "lines": len(log_content)
            }
        }
        accept = request.headers.get('Accept-Encoding', '')
        if 'gzip' in accept.lower():
            body = json.dumps(payload, ensure_ascii=False).encode('utf-8')
            gz = gzip.compress(body, compresslevel=6)
            resp = Response(gz, mimetype='application/json')
            resp.headers['Content-Encoding'] = 'gzip'
            return resp
        return jsonify(payload)
    except Exception as e:
        logger.error(f"获取微信日志失败: {e}")
        return jsonify({"success": False, "error": str(e)}), 500


# 在现有路由下添加
@system_bp.route('/api/current_user_info', methods=['GET'])
@login_required
def get_current_user_info():
    """获取当前登录的微信用户信息"""
    dashboard_server = current_app.dashboard_server
    result = dashboard_server.get_current_user_info()
    return jsonify(result)


@system_bp.route('/api/system/config/raw', methods=['GET'])
@login_required
def get_system_config_raw():
    try:
        config_path = _system_config_path()
        with open(config_path, 'r', encoding='utf-8') as f:
            config_text = f.read()
        # 展示运行时目录中的目标对象（backend+dify_app），便于调试 scene 绑定。
        catalog = _load_llm_catalog_runtime()
        backend_names = [str(item.get("name") or "").strip() for item in (catalog.get("backends", []) or [])]
        app_names = [f"dify_app::{str(item.get('name') or '').strip()}" for item in (catalog.get("dify_apps", []) or [])]
        return jsonify({
            "success": True,
            "data": config_text,
            "path": config_path,
            "llm_backends": sorted([name for name in backend_names + app_names if name]),
        })
    except Exception as e:
        logger.error(f"读取系统配置失败: {e}")
        return jsonify({"success": False, "message": str(e)}), 500


@system_bp.route('/api/system/config/update', methods=['POST'])
@login_required
def update_system_config():
    try:
        server = current_app.dashboard_server
        data = request.get_json() or {}
        config_text = data.get("config_text")
        if config_text is None:
            return jsonify({"success": False, "message": "缺少配置内容"}), 400

        yaml.safe_load(config_text)
        config_path = _system_config_path()
        with open(config_path, 'w', encoding='utf-8') as f:
            f.write(config_text)

        if getattr(server, "robot", None) and getattr(server.robot, "config", None):
            server.robot.config.reload()
            # 保存 YAML 后立刻把运行时依赖对象同步一遍，避免必须重启进程才能读到新值。
            server.robot.apply_runtime_config(reload_catalog=True)
        else:
            # 即便当前没有可用 robot 实例，也尽量把 LLM 路由缓存清掉，避免后续请求短时间内读旧值。
            LLMRegistry.invalidate_cache()

        return jsonify({"success": True, "message": "全局配置已保存并应用到运行时"})
    except Exception as e:
        logger.error(f"保存系统配置失败: {e}")
        return jsonify({"success": False, "message": str(e)}), 500


@system_bp.route('/api/system/llm_config', methods=['GET'])
@login_required
def get_system_llm_config():
    try:
        catalog = _load_llm_catalog_runtime()
        providers = sorted((catalog.get("providers", []) or []), key=lambda item: str(item.get("name") or ""))
        dify_apps = sorted((catalog.get("dify_apps", []) or []), key=lambda item: str(item.get("name") or ""))
        backends = sorted((catalog.get("backends", []) or []), key=lambda item: str(item.get("name") or ""))
        scenes = sorted((catalog.get("scenes", []) or []), key=lambda item: str(item.get("name") or ""))
        topology = _build_llm_topology()
        return jsonify({
            "success": True,
            "data": {
                "default_scene": catalog.get("default_scene", ""),
                "providers": providers,
                "dify_apps": dify_apps,
                "backends": backends,
                "scenes": scenes,
                "topology_rows": topology.get("topology_rows", []),
                "plugin_usages": topology.get("plugin_usages", []),
                # 新目录模型主存储在 MySQL。
                "config_path": (
                    "mysql:t_llm_provider_templates + t_llm_dify_apps + "
                    "t_llm_backends + t_llm_scenes (fallback yaml)"
                ),
            }
        })
    except Exception as e:
        logger.error(f"读取全局 LLM 配置失败: {e}")
        return jsonify({"success": False, "message": str(e)}), 500


@system_bp.route('/api/system/llm_runtime_analytics', methods=['GET'])
@login_required
def get_system_llm_runtime_analytics():
    """返回 LLM 最近窗口分析结果。

    这里不主动发起探活请求，也不做持久化成本结算，只消费统一客户端已经记录的最近窗口埋点：
    1. 避免后台刷新页面反过来给 AI 服务制造额外压力；
    2. 先把“按场景/后端/模型看成功率与耗时”做扎实；
    3. 为后续真正的 token 成本中心预留接口形态。
    """
    try:
        return jsonify({
            "success": True,
            "data": _build_llm_runtime_analytics_payload(),
        })
    except Exception as e:
        logger.error(f"读取 LLM 运行分析失败: {e}")
        return jsonify({"success": False, "message": str(e)}), 500


@system_bp.route('/api/system/llm_config', methods=['POST'])
@login_required
def update_system_llm_config():
    try:
        server = current_app.dashboard_server
        data = request.get_json() or {}
        default_scene = str(data.get("default_scene") or "").strip()
        provider_list = data.get("providers", []) or []
        dify_app_list = data.get("dify_apps", []) or []
        backend_list = data.get("backends", []) or []
        scene_list = data.get("scenes", []) or []

        if not isinstance(provider_list, list):
            return jsonify({"success": False, "message": "providers 格式不正确"}), 400
        if not isinstance(dify_app_list, list):
            return jsonify({"success": False, "message": "dify_apps 格式不正确"}), 400
        if not isinstance(backend_list, list):
            return jsonify({"success": False, "message": "backends 格式不正确"}), 400
        if not isinstance(scene_list, list):
            return jsonify({"success": False, "message": "scenes 格式不正确"}), 400

        # 目录级校验：先收集名字集合，便于 scene target 引用校验。
        provider_names = {
            str((item or {}).get("name") or "").strip()
            for item in provider_list
            if isinstance(item, dict) and str((item or {}).get("name") or "").strip()
        }
        dify_app_names = {
            str((item or {}).get("name") or "").strip()
            for item in dify_app_list
            if isinstance(item, dict) and str((item or {}).get("name") or "").strip()
        }
        backend_names = {
            str((item or {}).get("name") or "").strip()
            for item in backend_list
            if isinstance(item, dict) and str((item or {}).get("name") or "").strip()
        }

        for app in dify_app_list:
            if not isinstance(app, dict):
                continue
            app_name = str(app.get("name") or "").strip()
            if not app_name:
                continue
            provider_template = str(app.get("provider_template") or "").strip()
            if not provider_template:
                return jsonify({"success": False, "message": f"Dify应用 {app_name} 未绑定 Provider 模板"}), 400
            if provider_template not in provider_names:
                return jsonify({"success": False, "message": f"Dify应用 {app_name} 绑定的 Provider 不存在"}), 400
            app_key = str(app.get("app_key") or "").strip()
            if not app_key:
                return jsonify({"success": False, "message": f"Dify应用 {app_name} 缺少 app_key"}), 400

        scene_names = set()
        for scene in scene_list:
            if not isinstance(scene, dict):
                continue
            scene_name = str(scene.get("name") or "").strip()
            target_type = str(scene.get("target_type") or "").strip().lower()
            target_ref = str(scene.get("target_ref") or "").strip()
            if not scene_name:
                continue
            if scene_name in scene_names:
                return jsonify({"success": False, "message": f"场景名重复: {scene_name}"}), 400
            scene_names.add(scene_name)
            if target_type not in {"dify_app", "backend"}:
                return jsonify({"success": False, "message": f"场景 {scene_name} target_type 非法"}), 400
            if not target_ref:
                return jsonify({"success": False, "message": f"场景 {scene_name} 未绑定目标"}), 400
            if target_type == "dify_app" and target_ref not in dify_app_names:
                return jsonify({"success": False, "message": f"场景 {scene_name} 绑定的 dify_app 不存在"}), 400
            if target_type == "backend" and target_ref not in backend_names:
                return jsonify({"success": False, "message": f"场景 {scene_name} 绑定的 backend 不存在"}), 400

        if default_scene and default_scene not in scene_names:
            return jsonify({"success": False, "message": "默认场景不存在"}), 400

        catalog = {
            "default_scene": default_scene,
            "providers": provider_list,
            "dify_apps": dify_app_list,
            "backends": backend_list,
            "scenes": scene_list,
        }
        _save_llm_catalog_runtime(catalog)

        if getattr(server, "robot", None) and getattr(server.robot, "config", None):
            server.robot.config.reload()
            # LLM 目录保存到 MySQL 后，需要主动失效运行时缓存，保证插件下一次调用直接走新目录。
            server.robot.apply_runtime_config(reload_catalog=True)
        else:
            LLMRegistry.invalidate_cache()

        return jsonify({"success": True, "message": "全局 LLM 配置已保存并应用到运行时"})
    except Exception as e:
        logger.error(f"保存全局 LLM 配置失败: {e}")
        return jsonify({"success": False, "message": str(e)}), 500


@system_bp.route('/api/system/md2img_health', methods=['GET'])
@login_required
def get_md2img_health():
    """查询 Markdown 转图运行时健康状态。"""
    try:
        # 默认只读取状态，不主动拉起 runtime。
        # 当后台希望“刷新并顺便拉起”时，可传 ensure_runtime=true。
        ensure_runtime = str(request.args.get('ensure_runtime', 'false')).strip().lower() in {'1', 'true', 'yes', 'on'}
        data = get_md2img_health_snapshot(ensure_runtime=ensure_runtime)
        return jsonify({"success": True, "data": data})
    except Exception as e:
        logger.error(f"获取 md2img 健康状态失败: {e}")
        return jsonify({"success": False, "message": str(e)}), 500


@system_bp.route('/api/system/md2img_warmup', methods=['POST'])
@login_required
def trigger_md2img_warmup():
    """手动触发 Markdown 转图浏览器预热。"""
    try:
        payload = request.get_json(silent=True) or {}
        timeout_seconds = int(payload.get('timeout_seconds', 45) or 45)
        timeout_seconds = max(10, min(timeout_seconds, 180))
        ok = warmup_md2img_browser_sync(timeout_seconds=timeout_seconds)
        data = get_md2img_health_snapshot(ensure_runtime=False)
        if ok:
            return jsonify({
                "success": True,
                "message": f"预热完成（timeout={timeout_seconds}s）",
                "data": data,
            })
        return jsonify({
            "success": False,
            "message": f"预热失败（timeout={timeout_seconds}s），请查看运行日志",
            "data": data,
        }), 500
    except Exception as e:
        logger.error(f"触发 md2img 预热失败: {e}")
        return jsonify({"success": False, "message": str(e)}), 500


@system_bp.route('/api/restart_service', methods=['POST'])
@login_required
def restart_service():
    """调用项目根目录下的 restart.sh 重启服务"""
    try:
        project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
        script_path = os.path.join(project_root, 'restart.sh')

        if not os.path.exists(script_path):
            return jsonify({"success": False, "message": f"未找到脚本: {script_path}"}), 404

        subprocess.Popen(
            ['bash', script_path],
            cwd=project_root,
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,
            start_new_session=True
        )

        logger.warning(f"后台触发服务重启脚本: {script_path}")
        return jsonify({
            "success": True,
            "message": "已触发重启脚本，服务将在短时间内重启"
        })
    except Exception as e:
        logger.error(f"触发服务重启失败: {e}")
        return jsonify({"success": False, "message": str(e)}), 500