Files
abot/admin/dashboard/blueprints/system.py

1269 lines
52 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from flask import Blueprint, render_template, jsonify, request, send_from_directory, current_app, Response
from .auth import login_required
from loguru import logger
import os
import time
import subprocess
from datetime import datetime
import platform
import psutil
from collections import deque
import gzip
import json
import yaml
import toml
from utils.markdown_to_image import get_md2img_health_snapshot, warmup_md2img_browser_sync
from utils.ai.llm_registry import LLMRegistry
from base.plugin_common.plugin_interface import PluginStatus
from utils.ai.unified_llm import UnifiedLLMClient
from utils.decorator.async_job import async_job
# 创建系统信息蓝图
system_bp = Blueprint('system', __name__)
# 记录应用启动时间
APP_START_TIME = time.time()
def _system_config_path() -> str:
return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'config.yaml'))
def _load_system_yaml() -> dict:
config_path = _system_config_path()
if not os.path.exists(config_path):
return {}
with open(config_path, 'r', encoding='utf-8') as f:
return yaml.safe_load(f) or {}
def _save_system_yaml(config_obj: dict) -> None:
config_path = _system_config_path()
with open(config_path, 'w', encoding='utf-8') as f:
yaml.safe_dump(config_obj, f, allow_unicode=True, sort_keys=False)
def _safe_int(value, default: int = 0) -> int:
"""把数据库 / Redis 返回的字符串数字安全转成整数。"""
try:
if value in (None, ""):
return default
return int(float(value))
except (TypeError, ValueError):
return default
def _safe_float(value, default: float = 0.0) -> float:
"""把数据库 / Redis 返回的值安全转成浮点数。"""
try:
if value in (None, ""):
return default
return float(value)
except (TypeError, ValueError):
return default
def _format_bytes_to_mb(value: int) -> float:
"""把字节数转换为 MB保留两位小数便于首页摘要展示。"""
return round((_safe_float(value, 0.0) / 1024 / 1024), 2)
def _extract_mysql_runtime_snapshot(db_manager) -> dict:
"""采集 MySQL 运行态摘要。
首页目标不是替代 DBA 工具,而是让管理员一眼判断:
1. 数据库是不是活着;
2. 当前连接压力高不高;
3. 当前库规模是否已经明显变大;
4. 有没有必要继续深入到更专业的监控页排查。
"""
snapshot = {
"status": "healthy",
"summary": "连接正常",
# 这里不要假定 db_manager 一定实现了扩展 helper。
# 当前仓库存在多种 DBConnectionManager 版本,因此先走 getattr再在 SQL 查询里补足真实值。
"database": (
str(getattr(db_manager, "get_mysql_database_name", lambda: "")() or "").strip()
if db_manager is not None else ""
),
"version": "",
"threads_connected": 0,
"threads_running": 0,
"max_connections": 0,
"connection_usage_percent": 0.0,
"questions_per_second": 0.0,
"uptime_seconds": 0,
"table_count": 0,
"schema_size_mb": 0.0,
"slow_query_threshold_ms": (
int(getattr(db_manager, "get_slow_query_threshold_ms", lambda default=300: default)(300))
if db_manager is not None else 300
),
}
mysql_conn = db_manager.get_mysql_connection()
try:
with mysql_conn.cursor(dictionary=True) as cursor:
# 基础探活与版本识别:
# 1. SELECT VERSION() 成本极低;
# 2. 相比只做 SELECT 1它还能顺便拿到版本信息
# 3. 首页卡片里显示版本,方便线上排查“是不是某台库版本不一致”。
cursor.execute("SELECT VERSION() AS version, DATABASE() AS database_name")
version_row = cursor.fetchone() or {}
snapshot["version"] = str(version_row.get("version") or "").strip()
snapshot["database"] = str(version_row.get("database_name") or snapshot["database"] or "").strip()
cursor.execute(
"""
SHOW GLOBAL STATUS
WHERE Variable_name IN ('Threads_connected', 'Threads_running', 'Questions', 'Uptime')
"""
)
status_rows = cursor.fetchall() or []
status_map = {
str(row.get("Variable_name") or "").strip(): row.get("Value")
for row in status_rows
}
cursor.execute(
"""
SHOW GLOBAL VARIABLES
WHERE Variable_name IN ('max_connections')
"""
)
variable_rows = cursor.fetchall() or []
variable_map = {
str(row.get("Variable_name") or "").strip(): row.get("Value")
for row in variable_rows
}
# information_schema 聚合虽然比 SELECT 1 重一点,但仍属于轻量级元信息查询:
# 1. 只在首页 30 秒级刷新一次,成本可接受;
# 2. 能直接给出当前业务库表数量与体量变化;
# 3. 对判断“是不是消息表膨胀导致后台变慢”很有帮助。
cursor.execute(
"""
SELECT
COUNT(*) AS table_count,
COALESCE(SUM(data_length + index_length), 0) AS schema_size_bytes
FROM information_schema.tables
WHERE table_schema = DATABASE()
"""
)
schema_row = cursor.fetchone() or {}
snapshot["threads_connected"] = _safe_int(status_map.get("Threads_connected"))
snapshot["threads_running"] = _safe_int(status_map.get("Threads_running"))
snapshot["max_connections"] = _safe_int(variable_map.get("max_connections"))
snapshot["uptime_seconds"] = _safe_int(status_map.get("Uptime"))
total_questions = _safe_int(status_map.get("Questions"))
if snapshot["uptime_seconds"] > 0:
snapshot["questions_per_second"] = round(total_questions / snapshot["uptime_seconds"], 2)
if snapshot["max_connections"] > 0:
snapshot["connection_usage_percent"] = round(
(snapshot["threads_connected"] / snapshot["max_connections"]) * 100,
1,
)
snapshot["table_count"] = _safe_int(schema_row.get("table_count"))
snapshot["schema_size_mb"] = _format_bytes_to_mb(schema_row.get("schema_size_bytes"))
if snapshot["connection_usage_percent"] >= 80 or snapshot["threads_running"] >= 12:
snapshot["status"] = "warning"
snapshot["summary"] = (
f"连接压力偏高:已连接 {snapshot['threads_connected']} / {snapshot['max_connections']}"
f"运行中线程 {snapshot['threads_running']}"
)
else:
snapshot["summary"] = (
f"连接正常:已连接 {snapshot['threads_connected']} / {snapshot['max_connections'] or '-'}"
f"QPS {snapshot['questions_per_second']}"
)
return snapshot
except Exception as mysql_error:
snapshot["status"] = "danger"
snapshot["summary"] = f"MySQL 探测失败: {mysql_error}"
return snapshot
finally:
mysql_conn.close()
def _extract_redis_runtime_snapshot(db_manager) -> dict:
"""采集 Redis 运行态摘要。"""
redis_config = getattr(db_manager, "redis_config", {}) or {}
snapshot = {
"status": "healthy",
"summary": "连接正常",
"db_index": _safe_int(redis_config.get("db", 0)),
"key_count": 0,
"connected_clients": 0,
"blocked_clients": 0,
"ops_per_sec": 0,
"used_memory_human": "",
"used_memory_peak_human": "",
"memory_usage_percent": 0.0,
"uptime_seconds": 0,
"hit_rate_percent": 0.0,
}
try:
redis_conn = db_manager.get_redis_connection()
redis_conn.ping()
info = redis_conn.info() or {}
snapshot["key_count"] = _safe_int(redis_conn.dbsize())
snapshot["connected_clients"] = _safe_int(info.get("connected_clients"))
snapshot["blocked_clients"] = _safe_int(info.get("blocked_clients"))
snapshot["ops_per_sec"] = _safe_int(info.get("instantaneous_ops_per_sec"))
snapshot["used_memory_human"] = str(info.get("used_memory_human") or "").strip()
snapshot["used_memory_peak_human"] = str(info.get("used_memory_peak_human") or "").strip()
snapshot["uptime_seconds"] = _safe_int(info.get("uptime_in_seconds"))
maxmemory = _safe_int(info.get("maxmemory"))
used_memory = _safe_int(info.get("used_memory"))
if maxmemory > 0:
snapshot["memory_usage_percent"] = round((used_memory / maxmemory) * 100, 1)
keyspace_hits = _safe_int(info.get("keyspace_hits"))
keyspace_misses = _safe_int(info.get("keyspace_misses"))
if (keyspace_hits + keyspace_misses) > 0:
snapshot["hit_rate_percent"] = round(
(keyspace_hits / (keyspace_hits + keyspace_misses)) * 100,
1,
)
if snapshot["blocked_clients"] > 0 or snapshot["memory_usage_percent"] >= 80:
snapshot["status"] = "warning"
snapshot["summary"] = (
f"缓存压力需关注keys {snapshot['key_count']}"
f"clients {snapshot['connected_clients']}ops/s {snapshot['ops_per_sec']}"
)
else:
snapshot["summary"] = (
f"缓存正常keys {snapshot['key_count']}"
f"clients {snapshot['connected_clients']}ops/s {snapshot['ops_per_sec']}"
)
return snapshot
except Exception as redis_error:
snapshot["status"] = "danger"
snapshot["summary"] = f"Redis 探测失败: {redis_error}"
return snapshot
def _parse_snapshot_datetime(value: str | None) -> datetime | None:
"""把首页摘要里常用的时间字符串安全转换为 datetime。"""
text = str(value or "").strip()
if not text:
return None
try:
return datetime.strptime(text, "%Y-%m-%d %H:%M:%S")
except ValueError:
return None
def _count_enabled_runtime_items(items) -> int:
"""统计启用项数量。"""
rows = []
if isinstance(items, dict):
rows = list(items.values())
elif isinstance(items, list):
rows = list(items)
count = 0
for row in rows:
if not isinstance(row, dict):
continue
if "enabled" not in row or bool(row.get("enabled", True)):
count += 1
return count
def _extract_llm_catalog_summary() -> dict:
"""提取首页 LLM 路由配置摘要。"""
try:
catalog = LLMRegistry.get_catalog() or {}
if catalog:
providers = catalog.get("providers", {}) or {}
dify_apps = catalog.get("dify_apps", {}) or {}
backends = catalog.get("backends", {}) or {}
scenes = catalog.get("scenes", {}) or {}
default_scene = str(catalog.get("default_scene") or "").strip()
default_backend = str(LLMRegistry.get_scene_backend_name(default_scene) or "").strip() if default_scene else ""
return {
"provider_count": _count_enabled_runtime_items(providers),
"scene_count": _count_enabled_runtime_items(scenes),
"target_count": _count_enabled_runtime_items(backends) + _count_enabled_runtime_items(dify_apps),
"default_scene": default_scene,
"default_backend": default_backend,
"has_routing": _count_enabled_runtime_items(scenes) > 0,
}
legacy_llm = LLMRegistry.get_llm_config() or {}
scenes = legacy_llm.get("scenes", {}) or {}
backends = legacy_llm.get("backends", {}) or {}
default_backend = str(legacy_llm.get("default_backend") or "").strip()
return {
"provider_count": 0,
"scene_count": len(scenes) if isinstance(scenes, dict) else 0,
"target_count": len(backends) if isinstance(backends, dict) else 0,
"default_scene": "",
"default_backend": default_backend,
"has_routing": bool(scenes) or bool(default_backend),
}
except Exception as llm_catalog_error:
logger.warning(f"提取 LLM 路由摘要失败: {llm_catalog_error}")
return {
"provider_count": 0,
"scene_count": 0,
"target_count": 0,
"default_scene": "",
"default_backend": "",
"has_routing": False,
}
def _extract_ai_runtime_snapshot() -> dict:
"""构建首页 LLM 运行态摘要。"""
runtime_snapshot = UnifiedLLMClient.get_runtime_snapshot() or {}
last_call = dict(runtime_snapshot.get("last_call") or {})
catalog_summary = _extract_llm_catalog_summary()
total_calls = _safe_int(runtime_snapshot.get("total_calls"))
failed_calls = _safe_int(runtime_snapshot.get("failed_calls"))
success_rate = _safe_float(runtime_snapshot.get("success_rate"))
avg_latency_ms = _safe_float(runtime_snapshot.get("avg_latency_ms"))
last_error = str(runtime_snapshot.get("last_error") or "").strip()
snapshot = {
**runtime_snapshot,
"last_call": last_call,
"provider_count": catalog_summary.get("provider_count", 0),
"scene_count": catalog_summary.get("scene_count", 0),
"target_count": catalog_summary.get("target_count", 0),
"default_scene": catalog_summary.get("default_scene", ""),
"default_backend": catalog_summary.get("default_backend", ""),
"has_routing": bool(catalog_summary.get("has_routing")),
"last_provider": str(last_call.get("provider") or "").strip(),
"last_backend": str(last_call.get("backend") or "").strip(),
"last_scene": str(last_call.get("scene") or "").strip(),
"last_model": str(last_call.get("model") or "").strip(),
"last_timestamp": str(last_call.get("timestamp") or "").strip(),
"last_latency_ms": _safe_float(last_call.get("latency_ms")),
"last_error": last_error,
}
if not snapshot["has_routing"]:
snapshot["status"] = "warning"
snapshot["summary"] = "当前未发现完整的 LLM 路由配置,建议先检查默认场景与后端绑定"
return snapshot
if total_calls <= 0:
snapshot["status"] = "warning"
snapshot["summary"] = (
f"已配置 {snapshot['scene_count']} 个场景、{snapshot['target_count']} 个目标,"
"最近窗口内暂无统一 LLM 调用记录"
)
return snapshot
if failed_calls >= total_calls and total_calls > 0:
snapshot["status"] = "danger"
snapshot["summary"] = (
f"最近 {total_calls} 次调用全部失败,成功率 {success_rate:.2f}%"
f"平均耗时 {avg_latency_ms:.2f}ms"
)
return snapshot
if failed_calls > 0 or last_error:
snapshot["status"] = "warning"
snapshot["summary"] = (
f"最近 {total_calls} 次调用中失败 {failed_calls} 次,成功率 {success_rate:.2f}%"
f"平均耗时 {avg_latency_ms:.2f}ms"
)
return snapshot
snapshot["status"] = "healthy"
snapshot["summary"] = (
f"最近 {total_calls} 次调用全部成功,成功率 {success_rate:.2f}%"
f"平均耗时 {avg_latency_ms:.2f}ms"
)
return snapshot
def _extract_scheduler_runtime_snapshot() -> dict:
"""聚合 async_job 运行态,生成首页任务调度摘要。"""
runtime_rows = async_job.get_jobs_snapshot()
next_run_candidates = []
failed_rows = []
system_job_count = 0
plugin_job_count = 0
for row in runtime_rows:
job_key = str(row.get("job_key") or "").strip()
owner_name = str(row.get("owner_name") or "system").strip().lower()
next_run_at = _parse_snapshot_datetime(row.get("next_run_at"))
last_status = str(row.get("last_status") or "").strip().lower()
if job_key.startswith("plugin_schedule:") or owner_name != "system":
plugin_job_count += 1
else:
system_job_count += 1
if bool(row.get("enabled")) and next_run_at:
next_run_candidates.append(next_run_at)
if last_status in {"failed", "invalid_schedule"}:
failed_rows.append(row)
latest_failed_row = {}
if failed_rows:
failed_rows.sort(
key=lambda row: (
_parse_snapshot_datetime(row.get("updated_at"))
or _parse_snapshot_datetime(row.get("last_run_at"))
or datetime.min
),
reverse=True,
)
latest_failed_row = failed_rows[0]
invalid_jobs = sum(
1 for row in runtime_rows if str(row.get("last_status") or "").strip().lower() == "invalid_schedule"
)
total_jobs = len(runtime_rows)
enabled_jobs = sum(1 for row in runtime_rows if bool(row.get("enabled")))
running_jobs = sum(1 for row in runtime_rows if bool(row.get("running")))
failed_jobs = len(failed_rows)
paused_jobs = total_jobs - enabled_jobs
never_run_jobs = sum(1 for row in runtime_rows if str(row.get("last_status") or "").strip().lower() == "never")
next_run_at_text = min(next_run_candidates).strftime("%Y-%m-%d %H:%M:%S") if next_run_candidates else ""
latest_failed_error = str(latest_failed_row.get("last_error") or "").strip()
if len(latest_failed_error) > 120:
latest_failed_error = f"{latest_failed_error[:117]}..."
snapshot = {
"status": "healthy",
"summary": "任务调度运行正常",
"total_jobs": total_jobs,
"enabled_jobs": enabled_jobs,
"running_jobs": running_jobs,
"failed_jobs": failed_jobs,
"invalid_jobs": invalid_jobs,
"paused_jobs": paused_jobs,
"never_run_jobs": never_run_jobs,
"system_job_count": system_job_count,
"plugin_job_count": plugin_job_count,
"next_run_at": next_run_at_text,
"latest_failed_job_name": str(latest_failed_row.get("name") or "").strip(),
"latest_failed_error": latest_failed_error,
}
if total_jobs <= 0:
snapshot["status"] = "warning"
snapshot["summary"] = "当前没有加载任何定时任务"
return snapshot
if invalid_jobs > 0:
snapshot["status"] = "danger"
snapshot["summary"] = f"发现 {invalid_jobs} 个任务调度配置非法,建议立即检查任务页"
return snapshot
if failed_jobs > 0:
snapshot["status"] = "warning"
snapshot["summary"] = (
f"最近有 {failed_jobs} 个任务执行失败,"
f"下一次执行 {next_run_at_text or '暂未计算'}"
)
return snapshot
if enabled_jobs <= 0:
snapshot["status"] = "warning"
snapshot["summary"] = "任务已加载,但当前没有启用中的调度任务"
return snapshot
if running_jobs > 0:
snapshot["summary"] = (
f"当前有 {running_jobs} 个任务执行中,"
f"下一次执行 {next_run_at_text or '暂未计算'}"
)
return snapshot
snapshot["summary"] = f"已启用 {enabled_jobs} 个任务,下一次执行 {next_run_at_text or '暂未计算'}"
return snapshot
def _legacy_llm_to_catalog(legacy_llm: dict) -> dict:
"""把旧 llm(backends/scenes) 结构转换为新目录结构(仅用于兜底展示)。
说明:
1. 该转换不写库,只用于当目录表不可用时让后台页面仍可展示;
2. 规则与 DB bootstrap 一致dify backend 拆成 provider+dify_app其他保留为 backend。
"""
llm = legacy_llm or {}
old_backends = llm.get("backends", {}) or {}
old_scenes = llm.get("scenes", {}) or {}
default_backend = str(llm.get("default_backend") or "").strip()
providers = []
dify_apps = []
backends = []
scenes = []
dify_template_cfg = None
for backend in old_backends.values():
if isinstance(backend, dict) and str(backend.get("provider") or "").strip().lower() == "dify":
dify_template_cfg = dict(backend)
break
if dify_template_cfg:
providers.append(
{
"name": "dify_workflow_default",
"provider_type": "dify",
"enabled": True,
"config": {
"provider": "dify",
"api_base_url": dify_template_cfg.get("api_base_url", ""),
"endpoint": dify_template_cfg.get("endpoint", "workflows/run"),
"mode": dify_template_cfg.get("mode", "workflow"),
"response_mode": dify_template_cfg.get("response_mode", "blocking"),
"request_timeout": dify_template_cfg.get("request_timeout", 60),
"max_retries": dify_template_cfg.get("max_retries", 3),
"retry_delay_seconds": dify_template_cfg.get("retry_delay_seconds", 1.0),
},
}
)
for backend_name, backend_cfg in old_backends.items():
if not isinstance(backend_cfg, dict):
continue
provider = str(backend_cfg.get("provider") or "").strip().lower()
if provider == "dify":
dify_apps.append(
{
"name": str(backend_name),
"provider_template": "dify_workflow_default",
"app_key": str(backend_cfg.get("api_key") or "").strip(),
"workflow_output_key": str(backend_cfg.get("workflow_output_key") or "text").strip(),
"enabled": True,
"config": {
"endpoint": backend_cfg.get("endpoint", ""),
"mode": backend_cfg.get("mode", ""),
"response_mode": backend_cfg.get("response_mode", ""),
"request_timeout": backend_cfg.get("request_timeout", ""),
},
}
)
else:
backends.append(
{
"name": str(backend_name),
"enabled": True,
"config": dict(backend_cfg),
}
)
if isinstance(old_scenes, dict) and old_scenes:
for scene_name, backend_name in old_scenes.items():
scene_name = str(scene_name or "").strip()
backend_name = str(backend_name or "").strip()
if not scene_name or not backend_name:
continue
backend_cfg = old_backends.get(backend_name, {}) or {}
provider = str((backend_cfg or {}).get("provider") or "").strip().lower()
scenes.append(
{
"name": scene_name,
"target_type": "dify_app" if provider == "dify" else "backend",
"target_ref": backend_name,
"enabled": True,
}
)
elif default_backend:
default_cfg = old_backends.get(default_backend, {}) or {}
provider = str((default_cfg or {}).get("provider") or "").strip().lower()
scenes.append(
{
"name": "main.default",
"target_type": "dify_app" if provider == "dify" else "backend",
"target_ref": default_backend,
"enabled": True,
}
)
default_scene = scenes[0]["name"] if scenes else ""
return {
"default_scene": default_scene,
"providers": providers,
"dify_apps": dify_apps,
"backends": backends,
"scenes": scenes,
}
def _load_llm_catalog_runtime() -> dict:
"""读取运行时 LLM 目录配置(优先 MySQL 新模型)。"""
try:
server = current_app.dashboard_server
llm_catalog_db = getattr(server, "llm_catalog_db", None)
if llm_catalog_db:
catalog = llm_catalog_db.get_catalog() or {}
if catalog and catalog.get("scenes"):
return catalog
except Exception as e:
logger.warning(f"从 MySQL 读取 LLM 目录失败,回退 YAML: {e}")
# 兜底:把 YAML 的 legacy llm 转成目录结构给后台展示。
config_obj = _load_system_yaml()
llm_config = config_obj.get("llm", {}) or {}
if not isinstance(llm_config, dict):
llm_config = {}
return _legacy_llm_to_catalog(llm_config)
def _save_llm_catalog_runtime(catalog: dict) -> None:
"""保存运行时 LLM 目录配置到 MySQL。"""
server = current_app.dashboard_server
llm_catalog_db = getattr(server, "llm_catalog_db", None)
if not llm_catalog_db:
raise RuntimeError("llm_catalog_db 未初始化,无法保存 LLM 目录到 MySQL")
ok = llm_catalog_db.save_catalog(catalog or {})
if not ok:
raise RuntimeError("保存 LLM 目录到 MySQL 失败")
def _plugins_root_path() -> str:
"""返回插件根目录绝对路径。"""
return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'plugins'))
def _scan_plugin_llm_usage() -> list:
"""扫描各插件 config.toml提取插件与 LLM 场景的引用关系。
说明:
1. 该扫描仅用于后台可视化,不会改写插件配置;
2. 严格模式只采集 scene顶层 section 写法,或嵌套在 llm/api/report_api 等节点;
3. 返回结果用于“插件 -> scene -> backend”依赖拓扑展示。
"""
plugins_root = _plugins_root_path()
if not os.path.isdir(plugins_root):
return []
usages = []
def _collect_refs(plugin_name: str, section_name: str, payload: dict) -> None:
"""从单个配置节点收集 scene 引用。"""
if not isinstance(payload, dict):
return
scene_name = str(payload.get("scene") or "").strip()
if not scene_name:
return
usages.append({
"plugin": plugin_name,
"section": section_name,
"scene": scene_name,
})
for item in sorted(os.listdir(plugins_root)):
plugin_dir = os.path.join(plugins_root, item)
if not os.path.isdir(plugin_dir):
continue
config_path = os.path.join(plugin_dir, "config.toml")
if not os.path.exists(config_path):
continue
try:
config_obj = toml.load(config_path) or {}
except Exception as e:
logger.warning(f"扫描插件 LLM 依赖失败: plugin={item}, path={config_path}, error={e}")
continue
# 优先扫描每个 section兼容 [Dify] / [api] / [Douyu.report_api] 等写法。
for section_name, section_value in config_obj.items():
if isinstance(section_value, dict):
_collect_refs(item, str(section_name), section_value)
# 二层兜底:处理 llm/api/report_api 等嵌套节点。
for nested_name, nested_value in section_value.items():
if isinstance(nested_value, dict):
_collect_refs(item, f"{section_name}.{nested_name}", nested_value)
# 顶层兜底:兼容极少数直接写在根节点的 scene。
_collect_refs(item, "__root__", config_obj if isinstance(config_obj, dict) else {})
# 去重:同插件同 section 仅保留一条记录,避免前后兜底重复。
unique = {}
for row in usages:
key = f"{row.get('plugin')}::{row.get('section')}::{row.get('scene')}"
unique[key] = row
return sorted(unique.values(), key=lambda x: (x.get("plugin", ""), x.get("section", "")))
def _build_llm_topology() -> dict:
"""构建 LLM 拓扑视图(供后台页面直观展示依赖关系)。"""
catalog = _load_llm_catalog_runtime()
providers = {str(item.get("name") or "").strip(): item for item in (catalog.get("providers", []) or [])}
dify_apps = {str(item.get("name") or "").strip(): item for item in (catalog.get("dify_apps", []) or [])}
backends = {str(item.get("name") or "").strip(): item for item in (catalog.get("backends", []) or [])}
scenes = {str(item.get("name") or "").strip(): item for item in (catalog.get("scenes", []) or [])}
default_scene = str(catalog.get("default_scene") or "").strip()
plugin_usages = _scan_plugin_llm_usage()
topology_rows = []
for usage in plugin_usages:
scene_name = str(usage.get("scene") or "").strip()
scene = scenes.get(scene_name, {}) or {}
target_type = str(scene.get("target_type") or "").strip().lower()
target_ref = str(scene.get("target_ref") or "").strip()
resolved_provider = ""
resolved_target = target_ref
valid_target = False
if target_type == "dify_app":
app = dify_apps.get(target_ref, {}) or {}
provider_name = str(app.get("provider_template") or "").strip()
provider = providers.get(provider_name, {}) or {}
resolved_provider = str(provider.get("provider_type") or "").strip()
valid_target = bool(app and provider)
elif target_type == "backend":
backend = backends.get(target_ref, {}) or {}
backend_cfg = (backend.get("config") or {}) if isinstance(backend, dict) else {}
resolved_provider = str((backend_cfg or {}).get("provider") or "").strip()
valid_target = bool(backend)
topology_rows.append({
"plugin": usage.get("plugin", ""),
"section": usage.get("section", ""),
"scene": scene_name,
"target_type": target_type or "-",
"target_ref": resolved_target or "-",
"provider": resolved_provider or "-",
"valid_scene": bool(scene_name in scenes),
"valid_target": valid_target,
})
return {
"default_scene": default_scene,
"providers": catalog.get("providers", []) or [],
"dify_apps": catalog.get("dify_apps", []) or [],
"backends": catalog.get("backends", []) or [],
"scenes": catalog.get("scenes", []) or [],
"plugin_usages": plugin_usages,
"topology_rows": topology_rows,
}
@system_bp.route('/api_docs')
@login_required
def api_docs():
src = request.args.get('src')
if not src:
try:
server = current_app.dashboard_server
cfg = getattr(server.robot, "ipad_config", {}) or {}
src = cfg.get("server_url", "http://127.0.0.1:8059/")
except Exception:
src = "http://127.0.0.1:8059/"
return render_template('api_docs.html', src_url=src)
@system_bp.route('/system_status')
@login_required
def system_status():
src = request.args.get('src')
if not src:
try:
server = current_app.dashboard_server
glances = getattr(server.robot, "config").glances if hasattr(server.robot, "config") else {}
host = glances.get("host", "127.0.0.1")
port = glances.get("port", 61208)
src = f"http://{host}:{port}/"
except Exception:
src = "http://127.0.0.1:61208/"
return render_template('system_status.html', src_url=src)
@system_bp.route('/system_llm')
@login_required
def system_llm():
return render_template('system_llm.html')
# 页面路由
@system_bp.route('/wx_logs')
@login_required
def wx_logs():
return render_template('wx_logs.html')
# API路由
@system_bp.route('/api/system_info')
@login_required
def api_system_info():
try:
# 获取系统信息
system_info = {
"os": platform.system(),
"os_version": platform.version(),
"python_version": platform.python_version(),
"cpu_usage": psutil.cpu_percent(),
"memory_usage": psutil.virtual_memory().percent,
"disk_usage": psutil.disk_usage('/').percent,
"uptime": time.time() - APP_START_TIME, # 使用应用启动时间计算运行时长
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"open_files": len(psutil.Process(os.getpid()).open_files())
}
return jsonify({"success": True, "data": system_info})
except Exception as e:
logger.error(f"获取系统信息失败: {e}")
return jsonify({"success": False, "error": str(e)}), 500
@system_bp.route('/api/system_health_summary')
@login_required
def api_system_health_summary():
"""聚合首页可观测性所需的关键健康信号。"""
try:
server = current_app.dashboard_server
robot = getattr(server, "robot", None)
plugin_manager = getattr(server, "plugin_manager", None)
plugin_map = getattr(plugin_manager, "plugins", {}) or {}
# 统计插件运行状态,便于首页快速判断“加载了多少、真正跑起来多少、是否有异常插件”。
plugin_status_counter = {
"total": len(plugin_map),
"running": 0,
"loaded": 0,
"stopped": 0,
"error": 0,
"unloaded": 0,
"unknown": 0,
}
for plugin in plugin_map.values():
status = getattr(plugin, "status", None)
if status == PluginStatus.RUNNING:
plugin_status_counter["running"] += 1
elif status == PluginStatus.LOADED:
plugin_status_counter["loaded"] += 1
elif status == PluginStatus.STOPPED:
plugin_status_counter["stopped"] += 1
elif status == PluginStatus.ERROR:
plugin_status_counter["error"] += 1
elif status == PluginStatus.UNLOADED:
plugin_status_counter["unloaded"] += 1
else:
plugin_status_counter["unknown"] += 1
# 错误数量直接复用现有统计库,避免为了首页卡片再单独写一套 SQL。
_, recent_error_count = server.stats_db.get_error_logs(days=1, page=1, limit=1)
# 基础设施健康:
# 1. MySQL / Redis 都在这里做“首页摘要级”探测,而不是完整深度巡检;
# 2. 除了连通性,还补充少量负载指标,方便管理员快速判断是否需要继续下钻;
# 3. 即使探测失败也只反馈到看板,不影响主接口整体返回。
mysql_snapshot = _extract_mysql_runtime_snapshot(server.db_manager)
redis_snapshot = _extract_redis_runtime_snapshot(server.db_manager)
# 首页只需要“够判断”的轻量结论,因此统一产出 status + summary 文本,前端无需重复拼装业务规则。
robot_running = bool(getattr(robot, "ipad_running", False))
robot_nickname = str(getattr(robot, "nickname", "") or "").strip()
robot_wxid = str(getattr(robot, "wxid", "") or "").strip()
robot_summary = "已连接并正在处理消息" if robot_running else "未连接或主循环未运行"
if robot_nickname or robot_wxid:
robot_summary = f"{robot_summary} · {robot_nickname or robot_wxid}"
if plugin_status_counter["error"] > 0:
plugin_status = "warning"
plugin_summary = f"异常 {plugin_status_counter['error']} 个,运行中 {plugin_status_counter['running']} / {plugin_status_counter['total']}"
elif plugin_status_counter["running"] == 0 and plugin_status_counter["total"] > 0:
plugin_status = "warning"
plugin_summary = f"暂无运行中插件,共加载 {plugin_status_counter['total']}"
else:
plugin_status = "healthy"
plugin_summary = f"运行中 {plugin_status_counter['running']} / {plugin_status_counter['total']}"
if recent_error_count > 0:
error_status = "warning"
error_summary = f"近 24 小时记录到 {recent_error_count} 条异常"
else:
error_status = "healthy"
error_summary = "近 24 小时未记录到异常"
# 首页 AI 卡片升级为“运行态 + 路由摘要”,仍然保持被动观测,不主动探活。
ai_runtime = _extract_ai_runtime_snapshot()
# Markdown 转图更适合保留在专门页面里排障,首页右侧改成更通用的任务调度摘要。
scheduler_runtime = _extract_scheduler_runtime_snapshot()
return jsonify({
"success": True,
"data": {
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"robot": {
"status": "healthy" if robot_running else "danger",
"running": robot_running,
"nickname": robot_nickname,
"wxid": robot_wxid,
"summary": robot_summary,
},
"plugins": {
"status": plugin_status,
"summary": plugin_summary,
**plugin_status_counter,
},
"errors": {
"status": error_status,
"recent_24h_count": recent_error_count,
"summary": error_summary,
},
"infrastructure": {
"status": (
"danger"
if "danger" in {mysql_snapshot.get("status"), redis_snapshot.get("status")}
else ("warning" if "warning" in {mysql_snapshot.get("status"), redis_snapshot.get("status")} else "healthy")
),
"summary": (
"MySQL / Redis 均正常"
if mysql_snapshot.get("status") == "healthy" and redis_snapshot.get("status") == "healthy"
else (
"基础设施连接正常,但部分负载指标需要关注"
if mysql_snapshot.get("status") != "danger" and redis_snapshot.get("status") != "danger"
else "存在基础设施连接异常"
)
),
"mysql": mysql_snapshot,
"redis": redis_snapshot,
},
"ai_runtime": {
**ai_runtime,
},
"scheduler": {
**scheduler_runtime,
},
}
})
except Exception as e:
logger.error(f"获取系统健康摘要失败: {e}")
return jsonify({"success": False, "error": str(e)}), 500
@system_bp.route('/api/wx_logs')
@login_required
def api_wx_logs():
try:
log_type = request.args.get('type', 'info') # 默认显示info日志
lines = request.args.get('lines', 100, type=int) # 默认显示最后100行
# 修正日志文件路径计算,获取项目根目录
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..','logs'))
if log_type == 'error':
log_file = os.path.join(project_root, 'wx_error.log')
elif log_type == 'debug':
log_file = os.path.join(project_root, 'wx_debug.log')
else:
log_file = os.path.join(project_root, 'wx_info.log')
log_content = []
if os.path.exists(log_file):
try:
chunk_size = 8192
with open(log_file, 'rb') as f:
f.seek(0, os.SEEK_END)
size = f.tell()
buffer = b""
pos = size
while pos > 0 and buffer.count(b'\n') <= lines:
read_size = chunk_size if pos >= chunk_size else pos
pos -= read_size
f.seek(pos)
buffer = f.read(read_size) + buffer
log_content = [b.decode('utf-8', errors='ignore') for b in buffer.splitlines()[-lines:]]
except Exception as e:
logger.error(f"高效读取日志失败,回退到常规方式: {e}")
with open(log_file, 'r', encoding='utf-8', errors='ignore') as f:
log_content = list(deque(f, lines))
else:
logger.warning(f"日志文件不存在: {log_file}")
# 尝试列出项目根目录下的所有日志文件,帮助调试
try:
all_files = [f for f in os.listdir(project_root) if f.endswith('.log')]
logger.info(f"项目根目录下的日志文件: {all_files}")
except Exception as e:
logger.error(f"列出目录文件失败: {e}")
payload = {
"success": True,
"data": {
"log_type": log_type,
"log_file": log_file,
"content": log_content,
"lines": len(log_content)
}
}
accept = request.headers.get('Accept-Encoding', '')
if 'gzip' in accept.lower():
body = json.dumps(payload, ensure_ascii=False).encode('utf-8')
gz = gzip.compress(body, compresslevel=6)
resp = Response(gz, mimetype='application/json')
resp.headers['Content-Encoding'] = 'gzip'
return resp
return jsonify(payload)
except Exception as e:
logger.error(f"获取微信日志失败: {e}")
return jsonify({"success": False, "error": str(e)}), 500
# 在现有路由下添加
@system_bp.route('/api/current_user_info', methods=['GET'])
@login_required
def get_current_user_info():
"""获取当前登录的微信用户信息"""
dashboard_server = current_app.dashboard_server
result = dashboard_server.get_current_user_info()
return jsonify(result)
@system_bp.route('/api/system/config/raw', methods=['GET'])
@login_required
def get_system_config_raw():
try:
config_path = _system_config_path()
with open(config_path, 'r', encoding='utf-8') as f:
config_text = f.read()
# 展示运行时目录中的目标对象backend+dify_app便于调试 scene 绑定。
catalog = _load_llm_catalog_runtime()
backend_names = [str(item.get("name") or "").strip() for item in (catalog.get("backends", []) or [])]
app_names = [f"dify_app::{str(item.get('name') or '').strip()}" for item in (catalog.get("dify_apps", []) or [])]
return jsonify({
"success": True,
"data": config_text,
"path": config_path,
"llm_backends": sorted([name for name in backend_names + app_names if name]),
})
except Exception as e:
logger.error(f"读取系统配置失败: {e}")
return jsonify({"success": False, "message": str(e)}), 500
@system_bp.route('/api/system/config/update', methods=['POST'])
@login_required
def update_system_config():
try:
server = current_app.dashboard_server
data = request.get_json() or {}
config_text = data.get("config_text")
if config_text is None:
return jsonify({"success": False, "message": "缺少配置内容"}), 400
yaml.safe_load(config_text)
config_path = _system_config_path()
with open(config_path, 'w', encoding='utf-8') as f:
f.write(config_text)
if getattr(server, "robot", None) and getattr(server.robot, "config", None):
server.robot.config.reload()
# 保存 YAML 后立刻把运行时依赖对象同步一遍,避免必须重启进程才能读到新值。
server.robot.apply_runtime_config(reload_catalog=True)
else:
# 即便当前没有可用 robot 实例,也尽量把 LLM 路由缓存清掉,避免后续请求短时间内读旧值。
LLMRegistry.invalidate_cache()
return jsonify({"success": True, "message": "全局配置已保存并应用到运行时"})
except Exception as e:
logger.error(f"保存系统配置失败: {e}")
return jsonify({"success": False, "message": str(e)}), 500
@system_bp.route('/api/system/llm_config', methods=['GET'])
@login_required
def get_system_llm_config():
try:
catalog = _load_llm_catalog_runtime()
providers = sorted((catalog.get("providers", []) or []), key=lambda item: str(item.get("name") or ""))
dify_apps = sorted((catalog.get("dify_apps", []) or []), key=lambda item: str(item.get("name") or ""))
backends = sorted((catalog.get("backends", []) or []), key=lambda item: str(item.get("name") or ""))
scenes = sorted((catalog.get("scenes", []) or []), key=lambda item: str(item.get("name") or ""))
topology = _build_llm_topology()
return jsonify({
"success": True,
"data": {
"default_scene": catalog.get("default_scene", ""),
"providers": providers,
"dify_apps": dify_apps,
"backends": backends,
"scenes": scenes,
"topology_rows": topology.get("topology_rows", []),
"plugin_usages": topology.get("plugin_usages", []),
# 新目录模型主存储在 MySQL。
"config_path": (
"mysql:t_llm_provider_templates + t_llm_dify_apps + "
"t_llm_backends + t_llm_scenes (fallback yaml)"
),
}
})
except Exception as e:
logger.error(f"读取全局 LLM 配置失败: {e}")
return jsonify({"success": False, "message": str(e)}), 500
@system_bp.route('/api/system/llm_config', methods=['POST'])
@login_required
def update_system_llm_config():
try:
server = current_app.dashboard_server
data = request.get_json() or {}
default_scene = str(data.get("default_scene") or "").strip()
provider_list = data.get("providers", []) or []
dify_app_list = data.get("dify_apps", []) or []
backend_list = data.get("backends", []) or []
scene_list = data.get("scenes", []) or []
if not isinstance(provider_list, list):
return jsonify({"success": False, "message": "providers 格式不正确"}), 400
if not isinstance(dify_app_list, list):
return jsonify({"success": False, "message": "dify_apps 格式不正确"}), 400
if not isinstance(backend_list, list):
return jsonify({"success": False, "message": "backends 格式不正确"}), 400
if not isinstance(scene_list, list):
return jsonify({"success": False, "message": "scenes 格式不正确"}), 400
# 目录级校验:先收集名字集合,便于 scene target 引用校验。
provider_names = {
str((item or {}).get("name") or "").strip()
for item in provider_list
if isinstance(item, dict) and str((item or {}).get("name") or "").strip()
}
dify_app_names = {
str((item or {}).get("name") or "").strip()
for item in dify_app_list
if isinstance(item, dict) and str((item or {}).get("name") or "").strip()
}
backend_names = {
str((item or {}).get("name") or "").strip()
for item in backend_list
if isinstance(item, dict) and str((item or {}).get("name") or "").strip()
}
for app in dify_app_list:
if not isinstance(app, dict):
continue
app_name = str(app.get("name") or "").strip()
if not app_name:
continue
provider_template = str(app.get("provider_template") or "").strip()
if not provider_template:
return jsonify({"success": False, "message": f"Dify应用 {app_name} 未绑定 Provider 模板"}), 400
if provider_template not in provider_names:
return jsonify({"success": False, "message": f"Dify应用 {app_name} 绑定的 Provider 不存在"}), 400
app_key = str(app.get("app_key") or "").strip()
if not app_key:
return jsonify({"success": False, "message": f"Dify应用 {app_name} 缺少 app_key"}), 400
scene_names = set()
for scene in scene_list:
if not isinstance(scene, dict):
continue
scene_name = str(scene.get("name") or "").strip()
target_type = str(scene.get("target_type") or "").strip().lower()
target_ref = str(scene.get("target_ref") or "").strip()
if not scene_name:
continue
if scene_name in scene_names:
return jsonify({"success": False, "message": f"场景名重复: {scene_name}"}), 400
scene_names.add(scene_name)
if target_type not in {"dify_app", "backend"}:
return jsonify({"success": False, "message": f"场景 {scene_name} target_type 非法"}), 400
if not target_ref:
return jsonify({"success": False, "message": f"场景 {scene_name} 未绑定目标"}), 400
if target_type == "dify_app" and target_ref not in dify_app_names:
return jsonify({"success": False, "message": f"场景 {scene_name} 绑定的 dify_app 不存在"}), 400
if target_type == "backend" and target_ref not in backend_names:
return jsonify({"success": False, "message": f"场景 {scene_name} 绑定的 backend 不存在"}), 400
if default_scene and default_scene not in scene_names:
return jsonify({"success": False, "message": "默认场景不存在"}), 400
catalog = {
"default_scene": default_scene,
"providers": provider_list,
"dify_apps": dify_app_list,
"backends": backend_list,
"scenes": scene_list,
}
_save_llm_catalog_runtime(catalog)
if getattr(server, "robot", None) and getattr(server.robot, "config", None):
server.robot.config.reload()
# LLM 目录保存到 MySQL 后,需要主动失效运行时缓存,保证插件下一次调用直接走新目录。
server.robot.apply_runtime_config(reload_catalog=True)
else:
LLMRegistry.invalidate_cache()
return jsonify({"success": True, "message": "全局 LLM 配置已保存并应用到运行时"})
except Exception as e:
logger.error(f"保存全局 LLM 配置失败: {e}")
return jsonify({"success": False, "message": str(e)}), 500
@system_bp.route('/api/system/md2img_health', methods=['GET'])
@login_required
def get_md2img_health():
"""查询 Markdown 转图运行时健康状态。"""
try:
# 默认只读取状态,不主动拉起 runtime。
# 当后台希望“刷新并顺便拉起”时,可传 ensure_runtime=true。
ensure_runtime = str(request.args.get('ensure_runtime', 'false')).strip().lower() in {'1', 'true', 'yes', 'on'}
data = get_md2img_health_snapshot(ensure_runtime=ensure_runtime)
return jsonify({"success": True, "data": data})
except Exception as e:
logger.error(f"获取 md2img 健康状态失败: {e}")
return jsonify({"success": False, "message": str(e)}), 500
@system_bp.route('/api/system/md2img_warmup', methods=['POST'])
@login_required
def trigger_md2img_warmup():
"""手动触发 Markdown 转图浏览器预热。"""
try:
payload = request.get_json(silent=True) or {}
timeout_seconds = int(payload.get('timeout_seconds', 45) or 45)
timeout_seconds = max(10, min(timeout_seconds, 180))
ok = warmup_md2img_browser_sync(timeout_seconds=timeout_seconds)
data = get_md2img_health_snapshot(ensure_runtime=False)
if ok:
return jsonify({
"success": True,
"message": f"预热完成timeout={timeout_seconds}s",
"data": data,
})
return jsonify({
"success": False,
"message": f"预热失败timeout={timeout_seconds}s请查看运行日志",
"data": data,
}), 500
except Exception as e:
logger.error(f"触发 md2img 预热失败: {e}")
return jsonify({"success": False, "message": str(e)}), 500
@system_bp.route('/api/restart_service', methods=['POST'])
@login_required
def restart_service():
"""调用项目根目录下的 restart.sh 重启服务"""
try:
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
script_path = os.path.join(project_root, 'restart.sh')
if not os.path.exists(script_path):
return jsonify({"success": False, "message": f"未找到脚本: {script_path}"}), 404
subprocess.Popen(
['bash', script_path],
cwd=project_root,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
start_new_session=True
)
logger.warning(f"后台触发服务重启脚本: {script_path}")
return jsonify({
"success": True,
"message": "已触发重启脚本,服务将在短时间内重启"
})
except Exception as e:
logger.error(f"触发服务重启失败: {e}")
return jsonify({"success": False, "message": str(e)}), 500