1520 lines
62 KiB
Python
1520 lines
62 KiB
Python
from flask import Blueprint, render_template, jsonify, request, send_from_directory, current_app, Response
|
||
from .auth import login_required
|
||
from loguru import logger
|
||
import os
|
||
import time
|
||
import subprocess
|
||
import socket
|
||
from datetime import datetime
|
||
import platform
|
||
import psutil
|
||
from collections import deque
|
||
import gzip
|
||
import json
|
||
import yaml
|
||
import toml
|
||
from utils.markdown_to_image import get_md2img_health_snapshot, warmup_md2img_browser_sync
|
||
from utils.ai.llm_registry import LLMRegistry
|
||
from base.plugin_common.plugin_interface import PluginStatus
|
||
from utils.ai.unified_llm import UnifiedLLMClient
|
||
from utils.decorator.async_job import async_job
|
||
|
||
# 创建系统信息蓝图
|
||
system_bp = Blueprint('system', __name__)
|
||
|
||
# 记录应用启动时间
|
||
APP_START_TIME = time.time()
|
||
# 记录最近一次网络计数器采样,用于在资源监控页估算上/下行速率。
|
||
# 这里故意只做“页面级轻量采样”:
|
||
# 1. 不起额外守护线程,避免为了展示速率再引入常驻后台任务;
|
||
# 2. 只有用户刷新/轮询资源页时才计算速率,开销接近于零;
|
||
# 3. 即便进程重启缓存丢失,也只会让第一次速率显示为 0,不影响整体可用性。
|
||
NETWORK_IO_SAMPLE = {
|
||
"timestamp": 0.0,
|
||
"bytes_sent": 0,
|
||
"bytes_recv": 0,
|
||
}
|
||
|
||
# 资源监控页默认隐藏这类“对日常容量判断帮助不大”的系统挂载:
|
||
# 1. `squashfs` 基本就是 Ubuntu / Snap 挂出来的只读镜像;
|
||
# 2. `/dev/loop*` 多数也是镜像回环设备,看起来 100% 但不代表真实磁盘爆满;
|
||
# 3. `/proc` / `/sys` / `/dev` 这类伪文件系统更偏内核运行态,不适合放在业务运维首页里。
|
||
IGNORED_DISK_FSTYPES = {
|
||
"squashfs",
|
||
"proc",
|
||
"sysfs",
|
||
"devtmpfs",
|
||
"devfs",
|
||
"securityfs",
|
||
"cgroup",
|
||
"cgroup2",
|
||
"pstore",
|
||
"autofs",
|
||
"mqueue",
|
||
"hugetlbfs",
|
||
"debugfs",
|
||
"tracefs",
|
||
"configfs",
|
||
"fusectl",
|
||
"rpc_pipefs",
|
||
"tmpfs",
|
||
}
|
||
IGNORED_DISK_MOUNTPOINT_PREFIXES = (
|
||
"/snap/",
|
||
"/proc",
|
||
"/sys",
|
||
"/dev",
|
||
"/run/",
|
||
"/var/lib/snapd/",
|
||
)
|
||
IGNORED_DISK_DEVICE_PREFIXES = (
|
||
"/dev/loop",
|
||
)
|
||
|
||
|
||
def _system_config_path() -> str:
|
||
return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'config.yaml'))
|
||
|
||
|
||
def _load_system_yaml() -> dict:
|
||
config_path = _system_config_path()
|
||
if not os.path.exists(config_path):
|
||
return {}
|
||
with open(config_path, 'r', encoding='utf-8') as f:
|
||
return yaml.safe_load(f) or {}
|
||
|
||
|
||
def _save_system_yaml(config_obj: dict) -> None:
|
||
config_path = _system_config_path()
|
||
with open(config_path, 'w', encoding='utf-8') as f:
|
||
yaml.safe_dump(config_obj, f, allow_unicode=True, sort_keys=False)
|
||
|
||
|
||
def _safe_int(value, default: int = 0) -> int:
|
||
"""把数据库 / Redis 返回的字符串数字安全转成整数。"""
|
||
try:
|
||
if value in (None, ""):
|
||
return default
|
||
return int(float(value))
|
||
except (TypeError, ValueError):
|
||
return default
|
||
|
||
|
||
def _safe_float(value, default: float = 0.0) -> float:
|
||
"""把数据库 / Redis 返回的值安全转成浮点数。"""
|
||
try:
|
||
if value in (None, ""):
|
||
return default
|
||
return float(value)
|
||
except (TypeError, ValueError):
|
||
return default
|
||
|
||
|
||
def _format_bytes_to_mb(value: int) -> float:
|
||
"""把字节数转换为 MB,保留两位小数便于首页摘要展示。"""
|
||
return round((_safe_float(value, 0.0) / 1024 / 1024), 2)
|
||
|
||
|
||
def _safe_divide(numerator: float, denominator: float, default: float = 0.0) -> float:
|
||
"""安全除法,避免速率与占比计算时被 0 除打断。"""
|
||
try:
|
||
if not denominator:
|
||
return default
|
||
return numerator / denominator
|
||
except Exception:
|
||
return default
|
||
|
||
|
||
def _primary_disk_path() -> str:
|
||
"""返回当前系统最稳妥的主盘路径。"""
|
||
# 资源监控页既要兼容你本地 Windows 开发环境,也要兼容线上 Linux:
|
||
# 1. 优先用系统根目录,Linux 下是 /;
|
||
# 2. Windows 下会自动变成当前盘符根路径;
|
||
# 3. 避免把磁盘路径硬编码成 /,导致本地调试时报错。
|
||
return os.path.abspath(os.sep)
|
||
|
||
|
||
def _format_datetime_text(timestamp_value: float | int | None) -> str:
|
||
"""把时间戳格式化为后台页面可直接展示的文本。"""
|
||
if not timestamp_value:
|
||
return "-"
|
||
try:
|
||
return datetime.fromtimestamp(float(timestamp_value)).strftime("%Y-%m-%d %H:%M:%S")
|
||
except Exception:
|
||
return "-"
|
||
|
||
|
||
def _should_ignore_disk_partition(partition) -> bool:
|
||
"""判断某个挂载点是否应该从首页磁盘列表中隐藏。"""
|
||
mountpoint = str(getattr(partition, "mountpoint", "") or "").strip()
|
||
device = str(getattr(partition, "device", "") or "").strip()
|
||
fstype = str(getattr(partition, "fstype", "") or "").strip().lower()
|
||
|
||
# Windows 下通常不会命中这些 Linux 伪文件系统规则,这里保持跨平台兼容即可。
|
||
if fstype in IGNORED_DISK_FSTYPES:
|
||
return True
|
||
|
||
if any(mountpoint.startswith(prefix) for prefix in IGNORED_DISK_MOUNTPOINT_PREFIXES):
|
||
return True
|
||
|
||
if any(device.startswith(prefix) for prefix in IGNORED_DISK_DEVICE_PREFIXES):
|
||
return True
|
||
|
||
return False
|
||
|
||
|
||
def _sample_network_speed() -> dict:
|
||
"""根据两次页面采样估算网络上下行速率。"""
|
||
counters = psutil.net_io_counters()
|
||
now = time.time()
|
||
current_sent = _safe_int(getattr(counters, "bytes_sent", 0))
|
||
current_recv = _safe_int(getattr(counters, "bytes_recv", 0))
|
||
last_timestamp = _safe_float(NETWORK_IO_SAMPLE.get("timestamp"))
|
||
elapsed = max(now - last_timestamp, 0.0)
|
||
|
||
upload_speed = 0.0
|
||
download_speed = 0.0
|
||
if elapsed > 0 and last_timestamp > 0:
|
||
upload_speed = _safe_divide(current_sent - _safe_int(NETWORK_IO_SAMPLE.get("bytes_sent")), elapsed, 0.0)
|
||
download_speed = _safe_divide(current_recv - _safe_int(NETWORK_IO_SAMPLE.get("bytes_recv")), elapsed, 0.0)
|
||
upload_speed = max(upload_speed, 0.0)
|
||
download_speed = max(download_speed, 0.0)
|
||
|
||
NETWORK_IO_SAMPLE["timestamp"] = now
|
||
NETWORK_IO_SAMPLE["bytes_sent"] = current_sent
|
||
NETWORK_IO_SAMPLE["bytes_recv"] = current_recv
|
||
|
||
return {
|
||
"bytes_sent": current_sent,
|
||
"bytes_recv": current_recv,
|
||
"upload_speed_bps": round(upload_speed, 2),
|
||
"download_speed_bps": round(download_speed, 2),
|
||
}
|
||
|
||
|
||
def _extract_server_runtime_snapshot() -> dict:
|
||
"""构建资源监控页使用的轻量服务器运行态快照。"""
|
||
# 这套快照有意只覆盖“日常观察最有价值”的内容:
|
||
# 1. 主机资源:CPU / 内存 / 磁盘 / 网络;
|
||
# 2. 应用进程:当前 ABOT 进程是否活着、吃了多少资源;
|
||
# 3. 基础设施:MySQL / Redis 继续复用现有摘要探测;
|
||
# 4. 不再依赖 glances 进程,部署和运维负担会轻很多。
|
||
server = current_app.dashboard_server
|
||
current_process = psutil.Process(os.getpid())
|
||
virtual_memory = psutil.virtual_memory()
|
||
swap_memory = psutil.swap_memory()
|
||
cpu_usage = psutil.cpu_percent(interval=None)
|
||
process_cpu_usage = current_process.cpu_percent(interval=None)
|
||
boot_time = psutil.boot_time()
|
||
network_sample = _sample_network_speed()
|
||
disk_io = psutil.disk_io_counters()
|
||
try:
|
||
load_values = os.getloadavg()
|
||
except (AttributeError, OSError):
|
||
load_values = (0.0, 0.0, 0.0)
|
||
|
||
disk_items = []
|
||
hidden_disk_items_count = 0
|
||
seen_mountpoints = set()
|
||
for partition in psutil.disk_partitions(all=False):
|
||
mountpoint = str(getattr(partition, "mountpoint", "") or "").strip()
|
||
if not mountpoint or mountpoint in seen_mountpoints:
|
||
continue
|
||
seen_mountpoints.add(mountpoint)
|
||
if _should_ignore_disk_partition(partition):
|
||
hidden_disk_items_count += 1
|
||
continue
|
||
try:
|
||
usage = psutil.disk_usage(mountpoint)
|
||
except Exception:
|
||
continue
|
||
disk_items.append({
|
||
"device": str(getattr(partition, "device", "") or "").strip() or mountpoint,
|
||
"mountpoint": mountpoint,
|
||
"fstype": str(getattr(partition, "fstype", "") or "").strip(),
|
||
"total_bytes": _safe_int(getattr(usage, "total", 0)),
|
||
"used_bytes": _safe_int(getattr(usage, "used", 0)),
|
||
"free_bytes": _safe_int(getattr(usage, "free", 0)),
|
||
"usage_percent": round(_safe_float(getattr(usage, "percent", 0.0)), 1),
|
||
})
|
||
disk_items.sort(key=lambda item: item.get("usage_percent", 0.0), reverse=True)
|
||
|
||
primary_disk_usage = psutil.disk_usage(_primary_disk_path())
|
||
process_memory = current_process.memory_info()
|
||
try:
|
||
open_files = len(current_process.open_files())
|
||
except Exception:
|
||
open_files = 0
|
||
|
||
try:
|
||
tcp_connections = current_process.connections(kind="inet")
|
||
established_connections = sum(
|
||
1 for conn in tcp_connections if str(getattr(conn, "status", "") or "").upper() == "ESTABLISHED"
|
||
)
|
||
except Exception:
|
||
established_connections = 0
|
||
|
||
return {
|
||
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||
"server": {
|
||
"hostname": socket.gethostname(),
|
||
"os": platform.system(),
|
||
"os_version": platform.version(),
|
||
"python_version": platform.python_version(),
|
||
"boot_time": _format_datetime_text(boot_time),
|
||
"uptime_seconds": round(max(time.time() - boot_time, 0), 2),
|
||
},
|
||
"cpu": {
|
||
"usage_percent": round(cpu_usage, 1),
|
||
"logical_count": psutil.cpu_count(logical=True) or 0,
|
||
"physical_count": psutil.cpu_count(logical=False) or 0,
|
||
"load_1": round(_safe_float(load_values[0]), 2),
|
||
"load_5": round(_safe_float(load_values[1]), 2),
|
||
"load_15": round(_safe_float(load_values[2]), 2),
|
||
},
|
||
"memory": {
|
||
"usage_percent": round(_safe_float(getattr(virtual_memory, "percent", 0.0)), 1),
|
||
"total_bytes": _safe_int(getattr(virtual_memory, "total", 0)),
|
||
"used_bytes": _safe_int(getattr(virtual_memory, "used", 0)),
|
||
"available_bytes": _safe_int(getattr(virtual_memory, "available", 0)),
|
||
"swap_usage_percent": round(_safe_float(getattr(swap_memory, "percent", 0.0)), 1),
|
||
"swap_total_bytes": _safe_int(getattr(swap_memory, "total", 0)),
|
||
"swap_used_bytes": _safe_int(getattr(swap_memory, "used", 0)),
|
||
},
|
||
"disk": {
|
||
"primary_usage_percent": round(_safe_float(getattr(primary_disk_usage, "percent", 0.0)), 1),
|
||
"primary_total_bytes": _safe_int(getattr(primary_disk_usage, "total", 0)),
|
||
"primary_used_bytes": _safe_int(getattr(primary_disk_usage, "used", 0)),
|
||
"io_read_bytes": _safe_int(getattr(disk_io, "read_bytes", 0)) if disk_io else 0,
|
||
"io_write_bytes": _safe_int(getattr(disk_io, "write_bytes", 0)) if disk_io else 0,
|
||
"hidden_virtual_mount_count": hidden_disk_items_count,
|
||
"items": disk_items[:8],
|
||
},
|
||
"network": {
|
||
**network_sample,
|
||
"established_connections": established_connections,
|
||
},
|
||
"process": {
|
||
"pid": current_process.pid,
|
||
"cpu_percent": round(process_cpu_usage, 1),
|
||
"memory_percent": round(current_process.memory_percent(), 2),
|
||
"memory_rss_bytes": _safe_int(getattr(process_memory, "rss", 0)),
|
||
"thread_count": current_process.num_threads(),
|
||
"open_files": open_files,
|
||
"create_time": _format_datetime_text(current_process.create_time()),
|
||
"uptime_seconds": round(max(time.time() - current_process.create_time(), 0), 2),
|
||
},
|
||
"infrastructure": {
|
||
"mysql": _extract_mysql_runtime_snapshot(server.db_manager),
|
||
"redis": _extract_redis_runtime_snapshot(server.db_manager),
|
||
},
|
||
}
|
||
|
||
|
||
def _extract_mysql_runtime_snapshot(db_manager) -> dict:
|
||
"""采集 MySQL 运行态摘要。
|
||
|
||
首页目标不是替代 DBA 工具,而是让管理员一眼判断:
|
||
1. 数据库是不是活着;
|
||
2. 当前连接压力高不高;
|
||
3. 当前库规模是否已经明显变大;
|
||
4. 有没有必要继续深入到更专业的监控页排查。
|
||
"""
|
||
snapshot = {
|
||
"status": "healthy",
|
||
"summary": "连接正常",
|
||
# 这里不要假定 db_manager 一定实现了扩展 helper。
|
||
# 当前仓库存在多种 DBConnectionManager 版本,因此先走 getattr,再在 SQL 查询里补足真实值。
|
||
"database": (
|
||
str(getattr(db_manager, "get_mysql_database_name", lambda: "")() or "").strip()
|
||
if db_manager is not None else ""
|
||
),
|
||
"version": "",
|
||
"threads_connected": 0,
|
||
"threads_running": 0,
|
||
"max_connections": 0,
|
||
"connection_usage_percent": 0.0,
|
||
"questions_per_second": 0.0,
|
||
"uptime_seconds": 0,
|
||
"table_count": 0,
|
||
"schema_size_mb": 0.0,
|
||
"slow_query_threshold_ms": (
|
||
int(getattr(db_manager, "get_slow_query_threshold_ms", lambda default=300: default)(300))
|
||
if db_manager is not None else 300
|
||
),
|
||
}
|
||
|
||
mysql_conn = db_manager.get_mysql_connection()
|
||
try:
|
||
with mysql_conn.cursor(dictionary=True) as cursor:
|
||
# 基础探活与版本识别:
|
||
# 1. SELECT VERSION() 成本极低;
|
||
# 2. 相比只做 SELECT 1,它还能顺便拿到版本信息;
|
||
# 3. 首页卡片里显示版本,方便线上排查“是不是某台库版本不一致”。
|
||
cursor.execute("SELECT VERSION() AS version, DATABASE() AS database_name")
|
||
version_row = cursor.fetchone() or {}
|
||
snapshot["version"] = str(version_row.get("version") or "").strip()
|
||
snapshot["database"] = str(version_row.get("database_name") or snapshot["database"] or "").strip()
|
||
|
||
cursor.execute(
|
||
"""
|
||
SHOW GLOBAL STATUS
|
||
WHERE Variable_name IN ('Threads_connected', 'Threads_running', 'Questions', 'Uptime')
|
||
"""
|
||
)
|
||
status_rows = cursor.fetchall() or []
|
||
status_map = {
|
||
str(row.get("Variable_name") or "").strip(): row.get("Value")
|
||
for row in status_rows
|
||
}
|
||
|
||
cursor.execute(
|
||
"""
|
||
SHOW GLOBAL VARIABLES
|
||
WHERE Variable_name IN ('max_connections')
|
||
"""
|
||
)
|
||
variable_rows = cursor.fetchall() or []
|
||
variable_map = {
|
||
str(row.get("Variable_name") or "").strip(): row.get("Value")
|
||
for row in variable_rows
|
||
}
|
||
|
||
# information_schema 聚合虽然比 SELECT 1 重一点,但仍属于轻量级元信息查询:
|
||
# 1. 只在首页 30 秒级刷新一次,成本可接受;
|
||
# 2. 能直接给出当前业务库表数量与体量变化;
|
||
# 3. 对判断“是不是消息表膨胀导致后台变慢”很有帮助。
|
||
cursor.execute(
|
||
"""
|
||
SELECT
|
||
COUNT(*) AS table_count,
|
||
COALESCE(SUM(data_length + index_length), 0) AS schema_size_bytes
|
||
FROM information_schema.tables
|
||
WHERE table_schema = DATABASE()
|
||
"""
|
||
)
|
||
schema_row = cursor.fetchone() or {}
|
||
|
||
snapshot["threads_connected"] = _safe_int(status_map.get("Threads_connected"))
|
||
snapshot["threads_running"] = _safe_int(status_map.get("Threads_running"))
|
||
snapshot["max_connections"] = _safe_int(variable_map.get("max_connections"))
|
||
snapshot["uptime_seconds"] = _safe_int(status_map.get("Uptime"))
|
||
total_questions = _safe_int(status_map.get("Questions"))
|
||
if snapshot["uptime_seconds"] > 0:
|
||
snapshot["questions_per_second"] = round(total_questions / snapshot["uptime_seconds"], 2)
|
||
if snapshot["max_connections"] > 0:
|
||
snapshot["connection_usage_percent"] = round(
|
||
(snapshot["threads_connected"] / snapshot["max_connections"]) * 100,
|
||
1,
|
||
)
|
||
snapshot["table_count"] = _safe_int(schema_row.get("table_count"))
|
||
snapshot["schema_size_mb"] = _format_bytes_to_mb(schema_row.get("schema_size_bytes"))
|
||
|
||
if snapshot["connection_usage_percent"] >= 80 or snapshot["threads_running"] >= 12:
|
||
snapshot["status"] = "warning"
|
||
snapshot["summary"] = (
|
||
f"连接压力偏高:已连接 {snapshot['threads_connected']} / {snapshot['max_connections']},"
|
||
f"运行中线程 {snapshot['threads_running']}"
|
||
)
|
||
else:
|
||
snapshot["summary"] = (
|
||
f"连接正常:已连接 {snapshot['threads_connected']} / {snapshot['max_connections'] or '-'},"
|
||
f"QPS {snapshot['questions_per_second']}"
|
||
)
|
||
return snapshot
|
||
except Exception as mysql_error:
|
||
snapshot["status"] = "danger"
|
||
snapshot["summary"] = f"MySQL 探测失败: {mysql_error}"
|
||
return snapshot
|
||
finally:
|
||
mysql_conn.close()
|
||
|
||
|
||
def _extract_redis_runtime_snapshot(db_manager) -> dict:
|
||
"""采集 Redis 运行态摘要。"""
|
||
redis_config = getattr(db_manager, "redis_config", {}) or {}
|
||
snapshot = {
|
||
"status": "healthy",
|
||
"summary": "连接正常",
|
||
"db_index": _safe_int(redis_config.get("db", 0)),
|
||
"key_count": 0,
|
||
"connected_clients": 0,
|
||
"blocked_clients": 0,
|
||
"ops_per_sec": 0,
|
||
"used_memory_human": "",
|
||
"used_memory_peak_human": "",
|
||
"memory_usage_percent": 0.0,
|
||
"uptime_seconds": 0,
|
||
"hit_rate_percent": 0.0,
|
||
}
|
||
|
||
try:
|
||
redis_conn = db_manager.get_redis_connection()
|
||
redis_conn.ping()
|
||
info = redis_conn.info() or {}
|
||
snapshot["key_count"] = _safe_int(redis_conn.dbsize())
|
||
snapshot["connected_clients"] = _safe_int(info.get("connected_clients"))
|
||
snapshot["blocked_clients"] = _safe_int(info.get("blocked_clients"))
|
||
snapshot["ops_per_sec"] = _safe_int(info.get("instantaneous_ops_per_sec"))
|
||
snapshot["used_memory_human"] = str(info.get("used_memory_human") or "").strip()
|
||
snapshot["used_memory_peak_human"] = str(info.get("used_memory_peak_human") or "").strip()
|
||
snapshot["uptime_seconds"] = _safe_int(info.get("uptime_in_seconds"))
|
||
|
||
maxmemory = _safe_int(info.get("maxmemory"))
|
||
used_memory = _safe_int(info.get("used_memory"))
|
||
if maxmemory > 0:
|
||
snapshot["memory_usage_percent"] = round((used_memory / maxmemory) * 100, 1)
|
||
|
||
keyspace_hits = _safe_int(info.get("keyspace_hits"))
|
||
keyspace_misses = _safe_int(info.get("keyspace_misses"))
|
||
if (keyspace_hits + keyspace_misses) > 0:
|
||
snapshot["hit_rate_percent"] = round(
|
||
(keyspace_hits / (keyspace_hits + keyspace_misses)) * 100,
|
||
1,
|
||
)
|
||
|
||
if snapshot["blocked_clients"] > 0 or snapshot["memory_usage_percent"] >= 80:
|
||
snapshot["status"] = "warning"
|
||
snapshot["summary"] = (
|
||
f"缓存压力需关注:keys {snapshot['key_count']},"
|
||
f"clients {snapshot['connected_clients']},ops/s {snapshot['ops_per_sec']}"
|
||
)
|
||
else:
|
||
snapshot["summary"] = (
|
||
f"缓存正常:keys {snapshot['key_count']},"
|
||
f"clients {snapshot['connected_clients']},ops/s {snapshot['ops_per_sec']}"
|
||
)
|
||
return snapshot
|
||
except Exception as redis_error:
|
||
snapshot["status"] = "danger"
|
||
snapshot["summary"] = f"Redis 探测失败: {redis_error}"
|
||
return snapshot
|
||
|
||
|
||
def _parse_snapshot_datetime(value: str | None) -> datetime | None:
|
||
"""把首页摘要里常用的时间字符串安全转换为 datetime。"""
|
||
text = str(value or "").strip()
|
||
if not text:
|
||
return None
|
||
try:
|
||
return datetime.strptime(text, "%Y-%m-%d %H:%M:%S")
|
||
except ValueError:
|
||
return None
|
||
|
||
|
||
def _count_enabled_runtime_items(items) -> int:
|
||
"""统计启用项数量。"""
|
||
rows = []
|
||
if isinstance(items, dict):
|
||
rows = list(items.values())
|
||
elif isinstance(items, list):
|
||
rows = list(items)
|
||
count = 0
|
||
for row in rows:
|
||
if not isinstance(row, dict):
|
||
continue
|
||
if "enabled" not in row or bool(row.get("enabled", True)):
|
||
count += 1
|
||
return count
|
||
|
||
|
||
def _extract_llm_catalog_summary() -> dict:
|
||
"""提取首页 LLM 路由配置摘要。"""
|
||
try:
|
||
catalog = LLMRegistry.get_catalog() or {}
|
||
if catalog:
|
||
providers = catalog.get("providers", {}) or {}
|
||
dify_apps = catalog.get("dify_apps", {}) or {}
|
||
backends = catalog.get("backends", {}) or {}
|
||
scenes = catalog.get("scenes", {}) or {}
|
||
default_scene = str(catalog.get("default_scene") or "").strip()
|
||
default_backend = str(LLMRegistry.get_scene_backend_name(default_scene) or "").strip() if default_scene else ""
|
||
return {
|
||
"provider_count": _count_enabled_runtime_items(providers),
|
||
"scene_count": _count_enabled_runtime_items(scenes),
|
||
"target_count": _count_enabled_runtime_items(backends) + _count_enabled_runtime_items(dify_apps),
|
||
"default_scene": default_scene,
|
||
"default_backend": default_backend,
|
||
"has_routing": _count_enabled_runtime_items(scenes) > 0,
|
||
}
|
||
|
||
legacy_llm = LLMRegistry.get_llm_config() or {}
|
||
scenes = legacy_llm.get("scenes", {}) or {}
|
||
backends = legacy_llm.get("backends", {}) or {}
|
||
default_backend = str(legacy_llm.get("default_backend") or "").strip()
|
||
return {
|
||
"provider_count": 0,
|
||
"scene_count": len(scenes) if isinstance(scenes, dict) else 0,
|
||
"target_count": len(backends) if isinstance(backends, dict) else 0,
|
||
"default_scene": "",
|
||
"default_backend": default_backend,
|
||
"has_routing": bool(scenes) or bool(default_backend),
|
||
}
|
||
except Exception as llm_catalog_error:
|
||
logger.warning(f"提取 LLM 路由摘要失败: {llm_catalog_error}")
|
||
return {
|
||
"provider_count": 0,
|
||
"scene_count": 0,
|
||
"target_count": 0,
|
||
"default_scene": "",
|
||
"default_backend": "",
|
||
"has_routing": False,
|
||
}
|
||
|
||
|
||
def _extract_ai_runtime_snapshot() -> dict:
|
||
"""构建首页 LLM 运行态摘要。"""
|
||
runtime_snapshot = UnifiedLLMClient.get_runtime_snapshot() or {}
|
||
last_call = dict(runtime_snapshot.get("last_call") or {})
|
||
catalog_summary = _extract_llm_catalog_summary()
|
||
|
||
total_calls = _safe_int(runtime_snapshot.get("total_calls"))
|
||
failed_calls = _safe_int(runtime_snapshot.get("failed_calls"))
|
||
success_rate = _safe_float(runtime_snapshot.get("success_rate"))
|
||
avg_latency_ms = _safe_float(runtime_snapshot.get("avg_latency_ms"))
|
||
last_error = str(runtime_snapshot.get("last_error") or "").strip()
|
||
|
||
snapshot = {
|
||
**runtime_snapshot,
|
||
"last_call": last_call,
|
||
"provider_count": catalog_summary.get("provider_count", 0),
|
||
"scene_count": catalog_summary.get("scene_count", 0),
|
||
"target_count": catalog_summary.get("target_count", 0),
|
||
"default_scene": catalog_summary.get("default_scene", ""),
|
||
"default_backend": catalog_summary.get("default_backend", ""),
|
||
"has_routing": bool(catalog_summary.get("has_routing")),
|
||
"last_provider": str(last_call.get("provider") or "").strip(),
|
||
"last_backend": str(last_call.get("backend") or "").strip(),
|
||
"last_scene": str(last_call.get("scene") or "").strip(),
|
||
"last_model": str(last_call.get("model") or "").strip(),
|
||
"last_timestamp": str(last_call.get("timestamp") or "").strip(),
|
||
"last_latency_ms": _safe_float(last_call.get("latency_ms")),
|
||
"last_error": last_error,
|
||
}
|
||
|
||
if not snapshot["has_routing"]:
|
||
snapshot["status"] = "warning"
|
||
snapshot["summary"] = "当前未发现完整的 LLM 路由配置,建议先检查默认场景与后端绑定"
|
||
return snapshot
|
||
|
||
if total_calls <= 0:
|
||
snapshot["status"] = "warning"
|
||
snapshot["summary"] = (
|
||
f"已配置 {snapshot['scene_count']} 个场景、{snapshot['target_count']} 个目标,"
|
||
"最近窗口内暂无统一 LLM 调用记录"
|
||
)
|
||
return snapshot
|
||
|
||
if failed_calls >= total_calls and total_calls > 0:
|
||
snapshot["status"] = "danger"
|
||
snapshot["summary"] = (
|
||
f"最近 {total_calls} 次调用全部失败,成功率 {success_rate:.2f}%,"
|
||
f"平均耗时 {avg_latency_ms:.2f}ms"
|
||
)
|
||
return snapshot
|
||
|
||
if failed_calls > 0 or last_error:
|
||
snapshot["status"] = "warning"
|
||
snapshot["summary"] = (
|
||
f"最近 {total_calls} 次调用中失败 {failed_calls} 次,成功率 {success_rate:.2f}%,"
|
||
f"平均耗时 {avg_latency_ms:.2f}ms"
|
||
)
|
||
return snapshot
|
||
|
||
snapshot["status"] = "healthy"
|
||
snapshot["summary"] = (
|
||
f"最近 {total_calls} 次调用全部成功,成功率 {success_rate:.2f}%,"
|
||
f"平均耗时 {avg_latency_ms:.2f}ms"
|
||
)
|
||
return snapshot
|
||
|
||
|
||
def _extract_scheduler_runtime_snapshot() -> dict:
|
||
"""聚合 async_job 运行态,生成首页任务调度摘要。"""
|
||
runtime_rows = async_job.get_jobs_snapshot()
|
||
next_run_candidates = []
|
||
failed_rows = []
|
||
system_job_count = 0
|
||
plugin_job_count = 0
|
||
|
||
for row in runtime_rows:
|
||
job_key = str(row.get("job_key") or "").strip()
|
||
owner_name = str(row.get("owner_name") or "system").strip().lower()
|
||
next_run_at = _parse_snapshot_datetime(row.get("next_run_at"))
|
||
last_status = str(row.get("last_status") or "").strip().lower()
|
||
|
||
if job_key.startswith("plugin_schedule:") or owner_name != "system":
|
||
plugin_job_count += 1
|
||
else:
|
||
system_job_count += 1
|
||
|
||
if bool(row.get("enabled")) and next_run_at:
|
||
next_run_candidates.append(next_run_at)
|
||
if last_status in {"failed", "invalid_schedule"}:
|
||
failed_rows.append(row)
|
||
|
||
latest_failed_row = {}
|
||
if failed_rows:
|
||
failed_rows.sort(
|
||
key=lambda row: (
|
||
_parse_snapshot_datetime(row.get("updated_at"))
|
||
or _parse_snapshot_datetime(row.get("last_run_at"))
|
||
or datetime.min
|
||
),
|
||
reverse=True,
|
||
)
|
||
latest_failed_row = failed_rows[0]
|
||
|
||
invalid_jobs = sum(
|
||
1 for row in runtime_rows if str(row.get("last_status") or "").strip().lower() == "invalid_schedule"
|
||
)
|
||
total_jobs = len(runtime_rows)
|
||
enabled_jobs = sum(1 for row in runtime_rows if bool(row.get("enabled")))
|
||
running_jobs = sum(1 for row in runtime_rows if bool(row.get("running")))
|
||
failed_jobs = len(failed_rows)
|
||
paused_jobs = total_jobs - enabled_jobs
|
||
never_run_jobs = sum(1 for row in runtime_rows if str(row.get("last_status") or "").strip().lower() == "never")
|
||
next_run_at_text = min(next_run_candidates).strftime("%Y-%m-%d %H:%M:%S") if next_run_candidates else ""
|
||
latest_failed_error = str(latest_failed_row.get("last_error") or "").strip()
|
||
if len(latest_failed_error) > 120:
|
||
latest_failed_error = f"{latest_failed_error[:117]}..."
|
||
|
||
snapshot = {
|
||
"status": "healthy",
|
||
"summary": "任务调度运行正常",
|
||
"total_jobs": total_jobs,
|
||
"enabled_jobs": enabled_jobs,
|
||
"running_jobs": running_jobs,
|
||
"failed_jobs": failed_jobs,
|
||
"invalid_jobs": invalid_jobs,
|
||
"paused_jobs": paused_jobs,
|
||
"never_run_jobs": never_run_jobs,
|
||
"system_job_count": system_job_count,
|
||
"plugin_job_count": plugin_job_count,
|
||
"next_run_at": next_run_at_text,
|
||
"latest_failed_job_name": str(latest_failed_row.get("name") or "").strip(),
|
||
"latest_failed_error": latest_failed_error,
|
||
}
|
||
|
||
if total_jobs <= 0:
|
||
snapshot["status"] = "warning"
|
||
snapshot["summary"] = "当前没有加载任何定时任务"
|
||
return snapshot
|
||
|
||
if invalid_jobs > 0:
|
||
snapshot["status"] = "danger"
|
||
snapshot["summary"] = f"发现 {invalid_jobs} 个任务调度配置非法,建议立即检查任务页"
|
||
return snapshot
|
||
|
||
if failed_jobs > 0:
|
||
snapshot["status"] = "warning"
|
||
snapshot["summary"] = (
|
||
f"最近有 {failed_jobs} 个任务执行失败,"
|
||
f"下一次执行 {next_run_at_text or '暂未计算'}"
|
||
)
|
||
return snapshot
|
||
|
||
if enabled_jobs <= 0:
|
||
snapshot["status"] = "warning"
|
||
snapshot["summary"] = "任务已加载,但当前没有启用中的调度任务"
|
||
return snapshot
|
||
|
||
if running_jobs > 0:
|
||
snapshot["summary"] = (
|
||
f"当前有 {running_jobs} 个任务执行中,"
|
||
f"下一次执行 {next_run_at_text or '暂未计算'}"
|
||
)
|
||
return snapshot
|
||
|
||
snapshot["summary"] = f"已启用 {enabled_jobs} 个任务,下一次执行 {next_run_at_text or '暂未计算'}"
|
||
return snapshot
|
||
|
||
|
||
def _legacy_llm_to_catalog(legacy_llm: dict) -> dict:
|
||
"""把旧 llm(backends/scenes) 结构转换为新目录结构(仅用于兜底展示)。
|
||
|
||
说明:
|
||
1. 该转换不写库,只用于当目录表不可用时让后台页面仍可展示;
|
||
2. 规则与 DB bootstrap 一致:dify backend 拆成 provider+dify_app,其他保留为 backend。
|
||
"""
|
||
llm = legacy_llm or {}
|
||
old_backends = llm.get("backends", {}) or {}
|
||
old_scenes = llm.get("scenes", {}) or {}
|
||
default_backend = str(llm.get("default_backend") or "").strip()
|
||
|
||
providers = []
|
||
dify_apps = []
|
||
backends = []
|
||
scenes = []
|
||
|
||
dify_template_cfg = None
|
||
for backend in old_backends.values():
|
||
if isinstance(backend, dict) and str(backend.get("provider") or "").strip().lower() == "dify":
|
||
dify_template_cfg = dict(backend)
|
||
break
|
||
if dify_template_cfg:
|
||
providers.append(
|
||
{
|
||
"name": "dify_workflow_default",
|
||
"provider_type": "dify",
|
||
"enabled": True,
|
||
"config": {
|
||
"provider": "dify",
|
||
"api_base_url": dify_template_cfg.get("api_base_url", ""),
|
||
"endpoint": dify_template_cfg.get("endpoint", "workflows/run"),
|
||
"mode": dify_template_cfg.get("mode", "workflow"),
|
||
"response_mode": dify_template_cfg.get("response_mode", "blocking"),
|
||
"request_timeout": dify_template_cfg.get("request_timeout", 60),
|
||
"max_retries": dify_template_cfg.get("max_retries", 3),
|
||
"retry_delay_seconds": dify_template_cfg.get("retry_delay_seconds", 1.0),
|
||
},
|
||
}
|
||
)
|
||
|
||
for backend_name, backend_cfg in old_backends.items():
|
||
if not isinstance(backend_cfg, dict):
|
||
continue
|
||
provider = str(backend_cfg.get("provider") or "").strip().lower()
|
||
if provider == "dify":
|
||
dify_apps.append(
|
||
{
|
||
"name": str(backend_name),
|
||
"provider_template": "dify_workflow_default",
|
||
"app_key": str(backend_cfg.get("api_key") or "").strip(),
|
||
"workflow_output_key": str(backend_cfg.get("workflow_output_key") or "text").strip(),
|
||
"enabled": True,
|
||
"config": {
|
||
"endpoint": backend_cfg.get("endpoint", ""),
|
||
"mode": backend_cfg.get("mode", ""),
|
||
"response_mode": backend_cfg.get("response_mode", ""),
|
||
"request_timeout": backend_cfg.get("request_timeout", ""),
|
||
},
|
||
}
|
||
)
|
||
else:
|
||
backends.append(
|
||
{
|
||
"name": str(backend_name),
|
||
"enabled": True,
|
||
"config": dict(backend_cfg),
|
||
}
|
||
)
|
||
|
||
if isinstance(old_scenes, dict) and old_scenes:
|
||
for scene_name, backend_name in old_scenes.items():
|
||
scene_name = str(scene_name or "").strip()
|
||
backend_name = str(backend_name or "").strip()
|
||
if not scene_name or not backend_name:
|
||
continue
|
||
backend_cfg = old_backends.get(backend_name, {}) or {}
|
||
provider = str((backend_cfg or {}).get("provider") or "").strip().lower()
|
||
scenes.append(
|
||
{
|
||
"name": scene_name,
|
||
"target_type": "dify_app" if provider == "dify" else "backend",
|
||
"target_ref": backend_name,
|
||
"enabled": True,
|
||
}
|
||
)
|
||
elif default_backend:
|
||
default_cfg = old_backends.get(default_backend, {}) or {}
|
||
provider = str((default_cfg or {}).get("provider") or "").strip().lower()
|
||
scenes.append(
|
||
{
|
||
"name": "main.default",
|
||
"target_type": "dify_app" if provider == "dify" else "backend",
|
||
"target_ref": default_backend,
|
||
"enabled": True,
|
||
}
|
||
)
|
||
|
||
default_scene = scenes[0]["name"] if scenes else ""
|
||
return {
|
||
"default_scene": default_scene,
|
||
"providers": providers,
|
||
"dify_apps": dify_apps,
|
||
"backends": backends,
|
||
"scenes": scenes,
|
||
}
|
||
|
||
|
||
def _load_llm_catalog_runtime() -> dict:
|
||
"""读取运行时 LLM 目录配置(优先 MySQL 新模型)。"""
|
||
try:
|
||
server = current_app.dashboard_server
|
||
llm_catalog_db = getattr(server, "llm_catalog_db", None)
|
||
if llm_catalog_db:
|
||
catalog = llm_catalog_db.get_catalog() or {}
|
||
if catalog and catalog.get("scenes"):
|
||
return catalog
|
||
except Exception as e:
|
||
logger.warning(f"从 MySQL 读取 LLM 目录失败,回退 YAML: {e}")
|
||
|
||
# 兜底:把 YAML 的 legacy llm 转成目录结构给后台展示。
|
||
config_obj = _load_system_yaml()
|
||
llm_config = config_obj.get("llm", {}) or {}
|
||
if not isinstance(llm_config, dict):
|
||
llm_config = {}
|
||
return _legacy_llm_to_catalog(llm_config)
|
||
|
||
|
||
def _save_llm_catalog_runtime(catalog: dict) -> None:
|
||
"""保存运行时 LLM 目录配置到 MySQL。"""
|
||
server = current_app.dashboard_server
|
||
llm_catalog_db = getattr(server, "llm_catalog_db", None)
|
||
if not llm_catalog_db:
|
||
raise RuntimeError("llm_catalog_db 未初始化,无法保存 LLM 目录到 MySQL")
|
||
ok = llm_catalog_db.save_catalog(catalog or {})
|
||
if not ok:
|
||
raise RuntimeError("保存 LLM 目录到 MySQL 失败")
|
||
|
||
|
||
def _plugins_root_path() -> str:
|
||
"""返回插件根目录绝对路径。"""
|
||
return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'plugins'))
|
||
|
||
|
||
def _scan_plugin_llm_usage() -> list:
|
||
"""扫描各插件 config.toml,提取插件与 LLM 场景的引用关系。
|
||
|
||
说明:
|
||
1. 该扫描仅用于后台可视化,不会改写插件配置;
|
||
2. 严格模式只采集 scene:顶层 section 写法,或嵌套在 llm/api/report_api 等节点;
|
||
3. 返回结果用于“插件 -> scene -> backend”依赖拓扑展示。
|
||
"""
|
||
plugins_root = _plugins_root_path()
|
||
if not os.path.isdir(plugins_root):
|
||
return []
|
||
|
||
usages = []
|
||
|
||
def _collect_refs(plugin_name: str, section_name: str, payload: dict) -> None:
|
||
"""从单个配置节点收集 scene 引用。"""
|
||
if not isinstance(payload, dict):
|
||
return
|
||
scene_name = str(payload.get("scene") or "").strip()
|
||
if not scene_name:
|
||
return
|
||
usages.append({
|
||
"plugin": plugin_name,
|
||
"section": section_name,
|
||
"scene": scene_name,
|
||
})
|
||
|
||
for item in sorted(os.listdir(plugins_root)):
|
||
plugin_dir = os.path.join(plugins_root, item)
|
||
if not os.path.isdir(plugin_dir):
|
||
continue
|
||
config_path = os.path.join(plugin_dir, "config.toml")
|
||
if not os.path.exists(config_path):
|
||
continue
|
||
try:
|
||
config_obj = toml.load(config_path) or {}
|
||
except Exception as e:
|
||
logger.warning(f"扫描插件 LLM 依赖失败: plugin={item}, path={config_path}, error={e}")
|
||
continue
|
||
|
||
# 优先扫描每个 section:兼容 [Dify] / [api] / [Douyu.report_api] 等写法。
|
||
for section_name, section_value in config_obj.items():
|
||
if isinstance(section_value, dict):
|
||
_collect_refs(item, str(section_name), section_value)
|
||
# 二层兜底:处理 llm/api/report_api 等嵌套节点。
|
||
for nested_name, nested_value in section_value.items():
|
||
if isinstance(nested_value, dict):
|
||
_collect_refs(item, f"{section_name}.{nested_name}", nested_value)
|
||
# 顶层兜底:兼容极少数直接写在根节点的 scene。
|
||
_collect_refs(item, "__root__", config_obj if isinstance(config_obj, dict) else {})
|
||
|
||
# 去重:同插件同 section 仅保留一条记录,避免前后兜底重复。
|
||
unique = {}
|
||
for row in usages:
|
||
key = f"{row.get('plugin')}::{row.get('section')}::{row.get('scene')}"
|
||
unique[key] = row
|
||
return sorted(unique.values(), key=lambda x: (x.get("plugin", ""), x.get("section", "")))
|
||
|
||
|
||
def _build_llm_topology() -> dict:
|
||
"""构建 LLM 拓扑视图(供后台页面直观展示依赖关系)。"""
|
||
catalog = _load_llm_catalog_runtime()
|
||
providers = {str(item.get("name") or "").strip(): item for item in (catalog.get("providers", []) or [])}
|
||
dify_apps = {str(item.get("name") or "").strip(): item for item in (catalog.get("dify_apps", []) or [])}
|
||
backends = {str(item.get("name") or "").strip(): item for item in (catalog.get("backends", []) or [])}
|
||
scenes = {str(item.get("name") or "").strip(): item for item in (catalog.get("scenes", []) or [])}
|
||
default_scene = str(catalog.get("default_scene") or "").strip()
|
||
|
||
plugin_usages = _scan_plugin_llm_usage()
|
||
topology_rows = []
|
||
for usage in plugin_usages:
|
||
scene_name = str(usage.get("scene") or "").strip()
|
||
scene = scenes.get(scene_name, {}) or {}
|
||
target_type = str(scene.get("target_type") or "").strip().lower()
|
||
target_ref = str(scene.get("target_ref") or "").strip()
|
||
|
||
resolved_provider = ""
|
||
resolved_target = target_ref
|
||
valid_target = False
|
||
if target_type == "dify_app":
|
||
app = dify_apps.get(target_ref, {}) or {}
|
||
provider_name = str(app.get("provider_template") or "").strip()
|
||
provider = providers.get(provider_name, {}) or {}
|
||
resolved_provider = str(provider.get("provider_type") or "").strip()
|
||
valid_target = bool(app and provider)
|
||
elif target_type == "backend":
|
||
backend = backends.get(target_ref, {}) or {}
|
||
backend_cfg = (backend.get("config") or {}) if isinstance(backend, dict) else {}
|
||
resolved_provider = str((backend_cfg or {}).get("provider") or "").strip()
|
||
valid_target = bool(backend)
|
||
|
||
topology_rows.append({
|
||
"plugin": usage.get("plugin", ""),
|
||
"section": usage.get("section", ""),
|
||
"scene": scene_name,
|
||
"target_type": target_type or "-",
|
||
"target_ref": resolved_target or "-",
|
||
"provider": resolved_provider or "-",
|
||
"valid_scene": bool(scene_name in scenes),
|
||
"valid_target": valid_target,
|
||
})
|
||
|
||
return {
|
||
"default_scene": default_scene,
|
||
"providers": catalog.get("providers", []) or [],
|
||
"dify_apps": catalog.get("dify_apps", []) or [],
|
||
"backends": catalog.get("backends", []) or [],
|
||
"scenes": catalog.get("scenes", []) or [],
|
||
"plugin_usages": plugin_usages,
|
||
"topology_rows": topology_rows,
|
||
}
|
||
|
||
|
||
@system_bp.route('/api_docs')
|
||
@login_required
|
||
def api_docs():
|
||
src = request.args.get('src')
|
||
if not src:
|
||
try:
|
||
server = current_app.dashboard_server
|
||
cfg = getattr(server.robot, "ipad_config", {}) or {}
|
||
src = cfg.get("server_url", "http://127.0.0.1:8059/")
|
||
except Exception:
|
||
src = "http://127.0.0.1:8059/"
|
||
return render_template('api_docs.html', src_url=src)
|
||
|
||
|
||
@system_bp.route('/system_status')
|
||
@login_required
|
||
def system_status():
|
||
# 资源监控页改为项目内置轻量面板:
|
||
# 1. 不再依赖 glances 独立进程;
|
||
# 2. 页面只消费当前服务自身的 API;
|
||
# 3. 线上部署时不用额外开放 61208 之类的端口。
|
||
return render_template('system_status.html')
|
||
|
||
|
||
@system_bp.route('/system_llm')
|
||
@login_required
|
||
def system_llm():
|
||
return render_template('system_llm.html')
|
||
|
||
|
||
# 页面路由
|
||
@system_bp.route('/wx_logs')
|
||
@login_required
|
||
def wx_logs():
|
||
return render_template('wx_logs.html')
|
||
|
||
|
||
# API路由
|
||
@system_bp.route('/api/system_info')
|
||
@login_required
|
||
def api_system_info():
|
||
try:
|
||
# 获取系统信息
|
||
system_info = {
|
||
"os": platform.system(),
|
||
"os_version": platform.version(),
|
||
"python_version": platform.python_version(),
|
||
"cpu_usage": psutil.cpu_percent(),
|
||
"memory_usage": psutil.virtual_memory().percent,
|
||
"disk_usage": psutil.disk_usage('/').percent,
|
||
"uptime": time.time() - APP_START_TIME, # 使用应用启动时间计算运行时长
|
||
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||
"open_files": len(psutil.Process(os.getpid()).open_files())
|
||
}
|
||
|
||
return jsonify({"success": True, "data": system_info})
|
||
except Exception as e:
|
||
logger.error(f"获取系统信息失败: {e}")
|
||
return jsonify({"success": False, "error": str(e)}), 500
|
||
|
||
|
||
@system_bp.route('/api/system_status_overview')
|
||
@login_required
|
||
def api_system_status_overview():
|
||
"""返回资源监控页使用的轻量服务器快照。"""
|
||
try:
|
||
return jsonify({
|
||
"success": True,
|
||
"data": _extract_server_runtime_snapshot(),
|
||
})
|
||
except Exception as e:
|
||
logger.error(f"获取资源监控快照失败: {e}")
|
||
return jsonify({"success": False, "error": str(e)}), 500
|
||
|
||
|
||
@system_bp.route('/api/system_health_summary')
|
||
@login_required
|
||
def api_system_health_summary():
|
||
"""聚合首页可观测性所需的关键健康信号。"""
|
||
try:
|
||
server = current_app.dashboard_server
|
||
robot = getattr(server, "robot", None)
|
||
plugin_manager = getattr(server, "plugin_manager", None)
|
||
plugin_map = getattr(plugin_manager, "plugins", {}) or {}
|
||
|
||
# 统计插件运行状态,便于首页快速判断“加载了多少、真正跑起来多少、是否有异常插件”。
|
||
plugin_status_counter = {
|
||
"total": len(plugin_map),
|
||
"running": 0,
|
||
"loaded": 0,
|
||
"stopped": 0,
|
||
"error": 0,
|
||
"unloaded": 0,
|
||
"unknown": 0,
|
||
}
|
||
for plugin in plugin_map.values():
|
||
status = getattr(plugin, "status", None)
|
||
if status == PluginStatus.RUNNING:
|
||
plugin_status_counter["running"] += 1
|
||
elif status == PluginStatus.LOADED:
|
||
plugin_status_counter["loaded"] += 1
|
||
elif status == PluginStatus.STOPPED:
|
||
plugin_status_counter["stopped"] += 1
|
||
elif status == PluginStatus.ERROR:
|
||
plugin_status_counter["error"] += 1
|
||
elif status == PluginStatus.UNLOADED:
|
||
plugin_status_counter["unloaded"] += 1
|
||
else:
|
||
plugin_status_counter["unknown"] += 1
|
||
|
||
# 错误数量直接复用现有统计库,避免为了首页卡片再单独写一套 SQL。
|
||
_, recent_error_count = server.stats_db.get_error_logs(days=1, page=1, limit=1)
|
||
|
||
# 基础设施健康:
|
||
# 1. MySQL / Redis 都在这里做“首页摘要级”探测,而不是完整深度巡检;
|
||
# 2. 除了连通性,还补充少量负载指标,方便管理员快速判断是否需要继续下钻;
|
||
# 3. 即使探测失败也只反馈到看板,不影响主接口整体返回。
|
||
mysql_snapshot = _extract_mysql_runtime_snapshot(server.db_manager)
|
||
redis_snapshot = _extract_redis_runtime_snapshot(server.db_manager)
|
||
|
||
# 首页只需要“够判断”的轻量结论,因此统一产出 status + summary 文本,前端无需重复拼装业务规则。
|
||
robot_running = bool(getattr(robot, "ipad_running", False))
|
||
robot_nickname = str(getattr(robot, "nickname", "") or "").strip()
|
||
robot_wxid = str(getattr(robot, "wxid", "") or "").strip()
|
||
robot_summary = "已连接并正在处理消息" if robot_running else "未连接或主循环未运行"
|
||
if robot_nickname or robot_wxid:
|
||
robot_summary = f"{robot_summary} · {robot_nickname or robot_wxid}"
|
||
|
||
if plugin_status_counter["error"] > 0:
|
||
plugin_status = "warning"
|
||
plugin_summary = f"异常 {plugin_status_counter['error']} 个,运行中 {plugin_status_counter['running']} / {plugin_status_counter['total']}"
|
||
elif plugin_status_counter["running"] == 0 and plugin_status_counter["total"] > 0:
|
||
plugin_status = "warning"
|
||
plugin_summary = f"暂无运行中插件,共加载 {plugin_status_counter['total']} 个"
|
||
else:
|
||
plugin_status = "healthy"
|
||
plugin_summary = f"运行中 {plugin_status_counter['running']} / {plugin_status_counter['total']}"
|
||
|
||
if recent_error_count > 0:
|
||
error_status = "warning"
|
||
error_summary = f"近 24 小时记录到 {recent_error_count} 条异常"
|
||
else:
|
||
error_status = "healthy"
|
||
error_summary = "近 24 小时未记录到异常"
|
||
|
||
# 首页 AI 卡片升级为“运行态 + 路由摘要”,仍然保持被动观测,不主动探活。
|
||
ai_runtime = _extract_ai_runtime_snapshot()
|
||
|
||
# Markdown 转图更适合保留在专门页面里排障,首页右侧改成更通用的任务调度摘要。
|
||
scheduler_runtime = _extract_scheduler_runtime_snapshot()
|
||
|
||
return jsonify({
|
||
"success": True,
|
||
"data": {
|
||
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||
"robot": {
|
||
"status": "healthy" if robot_running else "danger",
|
||
"running": robot_running,
|
||
"nickname": robot_nickname,
|
||
"wxid": robot_wxid,
|
||
"summary": robot_summary,
|
||
},
|
||
"plugins": {
|
||
"status": plugin_status,
|
||
"summary": plugin_summary,
|
||
**plugin_status_counter,
|
||
},
|
||
"errors": {
|
||
"status": error_status,
|
||
"recent_24h_count": recent_error_count,
|
||
"summary": error_summary,
|
||
},
|
||
"infrastructure": {
|
||
"status": (
|
||
"danger"
|
||
if "danger" in {mysql_snapshot.get("status"), redis_snapshot.get("status")}
|
||
else ("warning" if "warning" in {mysql_snapshot.get("status"), redis_snapshot.get("status")} else "healthy")
|
||
),
|
||
"summary": (
|
||
"MySQL / Redis 均正常"
|
||
if mysql_snapshot.get("status") == "healthy" and redis_snapshot.get("status") == "healthy"
|
||
else (
|
||
"基础设施连接正常,但部分负载指标需要关注"
|
||
if mysql_snapshot.get("status") != "danger" and redis_snapshot.get("status") != "danger"
|
||
else "存在基础设施连接异常"
|
||
)
|
||
),
|
||
"mysql": mysql_snapshot,
|
||
"redis": redis_snapshot,
|
||
},
|
||
"ai_runtime": {
|
||
**ai_runtime,
|
||
},
|
||
"scheduler": {
|
||
**scheduler_runtime,
|
||
},
|
||
}
|
||
})
|
||
except Exception as e:
|
||
logger.error(f"获取系统健康摘要失败: {e}")
|
||
return jsonify({"success": False, "error": str(e)}), 500
|
||
|
||
|
||
@system_bp.route('/api/wx_logs')
|
||
@login_required
|
||
def api_wx_logs():
|
||
try:
|
||
log_type = request.args.get('type', 'info') # 默认显示info日志
|
||
lines = request.args.get('lines', 100, type=int) # 默认显示最后100行
|
||
|
||
# 修正日志文件路径计算,获取项目根目录
|
||
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..','logs'))
|
||
|
||
if log_type == 'error':
|
||
log_file = os.path.join(project_root, 'wx_error.log')
|
||
elif log_type == 'debug':
|
||
log_file = os.path.join(project_root, 'wx_debug.log')
|
||
else:
|
||
log_file = os.path.join(project_root, 'wx_info.log')
|
||
|
||
log_content = []
|
||
if os.path.exists(log_file):
|
||
try:
|
||
chunk_size = 8192
|
||
with open(log_file, 'rb') as f:
|
||
f.seek(0, os.SEEK_END)
|
||
size = f.tell()
|
||
buffer = b""
|
||
pos = size
|
||
while pos > 0 and buffer.count(b'\n') <= lines:
|
||
read_size = chunk_size if pos >= chunk_size else pos
|
||
pos -= read_size
|
||
f.seek(pos)
|
||
buffer = f.read(read_size) + buffer
|
||
log_content = [b.decode('utf-8', errors='ignore') for b in buffer.splitlines()[-lines:]]
|
||
except Exception as e:
|
||
logger.error(f"高效读取日志失败,回退到常规方式: {e}")
|
||
with open(log_file, 'r', encoding='utf-8', errors='ignore') as f:
|
||
log_content = list(deque(f, lines))
|
||
else:
|
||
logger.warning(f"日志文件不存在: {log_file}")
|
||
# 尝试列出项目根目录下的所有日志文件,帮助调试
|
||
try:
|
||
all_files = [f for f in os.listdir(project_root) if f.endswith('.log')]
|
||
logger.info(f"项目根目录下的日志文件: {all_files}")
|
||
except Exception as e:
|
||
logger.error(f"列出目录文件失败: {e}")
|
||
|
||
payload = {
|
||
"success": True,
|
||
"data": {
|
||
"log_type": log_type,
|
||
"log_file": log_file,
|
||
"content": log_content,
|
||
"lines": len(log_content)
|
||
}
|
||
}
|
||
accept = request.headers.get('Accept-Encoding', '')
|
||
if 'gzip' in accept.lower():
|
||
body = json.dumps(payload, ensure_ascii=False).encode('utf-8')
|
||
gz = gzip.compress(body, compresslevel=6)
|
||
resp = Response(gz, mimetype='application/json')
|
||
resp.headers['Content-Encoding'] = 'gzip'
|
||
return resp
|
||
return jsonify(payload)
|
||
except Exception as e:
|
||
logger.error(f"获取微信日志失败: {e}")
|
||
return jsonify({"success": False, "error": str(e)}), 500
|
||
|
||
|
||
# 在现有路由下添加
|
||
@system_bp.route('/api/current_user_info', methods=['GET'])
|
||
@login_required
|
||
def get_current_user_info():
|
||
"""获取当前登录的微信用户信息"""
|
||
dashboard_server = current_app.dashboard_server
|
||
result = dashboard_server.get_current_user_info()
|
||
return jsonify(result)
|
||
|
||
|
||
@system_bp.route('/api/system/config/raw', methods=['GET'])
|
||
@login_required
|
||
def get_system_config_raw():
|
||
try:
|
||
config_path = _system_config_path()
|
||
with open(config_path, 'r', encoding='utf-8') as f:
|
||
config_text = f.read()
|
||
# 展示运行时目录中的目标对象(backend+dify_app),便于调试 scene 绑定。
|
||
catalog = _load_llm_catalog_runtime()
|
||
backend_names = [str(item.get("name") or "").strip() for item in (catalog.get("backends", []) or [])]
|
||
app_names = [f"dify_app::{str(item.get('name') or '').strip()}" for item in (catalog.get("dify_apps", []) or [])]
|
||
return jsonify({
|
||
"success": True,
|
||
"data": config_text,
|
||
"path": config_path,
|
||
"llm_backends": sorted([name for name in backend_names + app_names if name]),
|
||
})
|
||
except Exception as e:
|
||
logger.error(f"读取系统配置失败: {e}")
|
||
return jsonify({"success": False, "message": str(e)}), 500
|
||
|
||
|
||
@system_bp.route('/api/system/config/update', methods=['POST'])
|
||
@login_required
|
||
def update_system_config():
|
||
try:
|
||
server = current_app.dashboard_server
|
||
data = request.get_json() or {}
|
||
config_text = data.get("config_text")
|
||
if config_text is None:
|
||
return jsonify({"success": False, "message": "缺少配置内容"}), 400
|
||
|
||
yaml.safe_load(config_text)
|
||
config_path = _system_config_path()
|
||
with open(config_path, 'w', encoding='utf-8') as f:
|
||
f.write(config_text)
|
||
|
||
if getattr(server, "robot", None) and getattr(server.robot, "config", None):
|
||
server.robot.config.reload()
|
||
# 保存 YAML 后立刻把运行时依赖对象同步一遍,避免必须重启进程才能读到新值。
|
||
server.robot.apply_runtime_config(reload_catalog=True)
|
||
else:
|
||
# 即便当前没有可用 robot 实例,也尽量把 LLM 路由缓存清掉,避免后续请求短时间内读旧值。
|
||
LLMRegistry.invalidate_cache()
|
||
|
||
return jsonify({"success": True, "message": "全局配置已保存并应用到运行时"})
|
||
except Exception as e:
|
||
logger.error(f"保存系统配置失败: {e}")
|
||
return jsonify({"success": False, "message": str(e)}), 500
|
||
|
||
|
||
@system_bp.route('/api/system/llm_config', methods=['GET'])
|
||
@login_required
|
||
def get_system_llm_config():
|
||
try:
|
||
catalog = _load_llm_catalog_runtime()
|
||
providers = sorted((catalog.get("providers", []) or []), key=lambda item: str(item.get("name") or ""))
|
||
dify_apps = sorted((catalog.get("dify_apps", []) or []), key=lambda item: str(item.get("name") or ""))
|
||
backends = sorted((catalog.get("backends", []) or []), key=lambda item: str(item.get("name") or ""))
|
||
scenes = sorted((catalog.get("scenes", []) or []), key=lambda item: str(item.get("name") or ""))
|
||
topology = _build_llm_topology()
|
||
return jsonify({
|
||
"success": True,
|
||
"data": {
|
||
"default_scene": catalog.get("default_scene", ""),
|
||
"providers": providers,
|
||
"dify_apps": dify_apps,
|
||
"backends": backends,
|
||
"scenes": scenes,
|
||
"topology_rows": topology.get("topology_rows", []),
|
||
"plugin_usages": topology.get("plugin_usages", []),
|
||
# 新目录模型主存储在 MySQL。
|
||
"config_path": (
|
||
"mysql:t_llm_provider_templates + t_llm_dify_apps + "
|
||
"t_llm_backends + t_llm_scenes (fallback yaml)"
|
||
),
|
||
}
|
||
})
|
||
except Exception as e:
|
||
logger.error(f"读取全局 LLM 配置失败: {e}")
|
||
return jsonify({"success": False, "message": str(e)}), 500
|
||
|
||
|
||
@system_bp.route('/api/system/llm_config', methods=['POST'])
|
||
@login_required
|
||
def update_system_llm_config():
|
||
try:
|
||
server = current_app.dashboard_server
|
||
data = request.get_json() or {}
|
||
default_scene = str(data.get("default_scene") or "").strip()
|
||
provider_list = data.get("providers", []) or []
|
||
dify_app_list = data.get("dify_apps", []) or []
|
||
backend_list = data.get("backends", []) or []
|
||
scene_list = data.get("scenes", []) or []
|
||
|
||
if not isinstance(provider_list, list):
|
||
return jsonify({"success": False, "message": "providers 格式不正确"}), 400
|
||
if not isinstance(dify_app_list, list):
|
||
return jsonify({"success": False, "message": "dify_apps 格式不正确"}), 400
|
||
if not isinstance(backend_list, list):
|
||
return jsonify({"success": False, "message": "backends 格式不正确"}), 400
|
||
if not isinstance(scene_list, list):
|
||
return jsonify({"success": False, "message": "scenes 格式不正确"}), 400
|
||
|
||
# 目录级校验:先收集名字集合,便于 scene target 引用校验。
|
||
provider_names = {
|
||
str((item or {}).get("name") or "").strip()
|
||
for item in provider_list
|
||
if isinstance(item, dict) and str((item or {}).get("name") or "").strip()
|
||
}
|
||
dify_app_names = {
|
||
str((item or {}).get("name") or "").strip()
|
||
for item in dify_app_list
|
||
if isinstance(item, dict) and str((item or {}).get("name") or "").strip()
|
||
}
|
||
backend_names = {
|
||
str((item or {}).get("name") or "").strip()
|
||
for item in backend_list
|
||
if isinstance(item, dict) and str((item or {}).get("name") or "").strip()
|
||
}
|
||
|
||
for app in dify_app_list:
|
||
if not isinstance(app, dict):
|
||
continue
|
||
app_name = str(app.get("name") or "").strip()
|
||
if not app_name:
|
||
continue
|
||
provider_template = str(app.get("provider_template") or "").strip()
|
||
if not provider_template:
|
||
return jsonify({"success": False, "message": f"Dify应用 {app_name} 未绑定 Provider 模板"}), 400
|
||
if provider_template not in provider_names:
|
||
return jsonify({"success": False, "message": f"Dify应用 {app_name} 绑定的 Provider 不存在"}), 400
|
||
app_key = str(app.get("app_key") or "").strip()
|
||
if not app_key:
|
||
return jsonify({"success": False, "message": f"Dify应用 {app_name} 缺少 app_key"}), 400
|
||
|
||
scene_names = set()
|
||
for scene in scene_list:
|
||
if not isinstance(scene, dict):
|
||
continue
|
||
scene_name = str(scene.get("name") or "").strip()
|
||
target_type = str(scene.get("target_type") or "").strip().lower()
|
||
target_ref = str(scene.get("target_ref") or "").strip()
|
||
if not scene_name:
|
||
continue
|
||
if scene_name in scene_names:
|
||
return jsonify({"success": False, "message": f"场景名重复: {scene_name}"}), 400
|
||
scene_names.add(scene_name)
|
||
if target_type not in {"dify_app", "backend"}:
|
||
return jsonify({"success": False, "message": f"场景 {scene_name} target_type 非法"}), 400
|
||
if not target_ref:
|
||
return jsonify({"success": False, "message": f"场景 {scene_name} 未绑定目标"}), 400
|
||
if target_type == "dify_app" and target_ref not in dify_app_names:
|
||
return jsonify({"success": False, "message": f"场景 {scene_name} 绑定的 dify_app 不存在"}), 400
|
||
if target_type == "backend" and target_ref not in backend_names:
|
||
return jsonify({"success": False, "message": f"场景 {scene_name} 绑定的 backend 不存在"}), 400
|
||
|
||
if default_scene and default_scene not in scene_names:
|
||
return jsonify({"success": False, "message": "默认场景不存在"}), 400
|
||
|
||
catalog = {
|
||
"default_scene": default_scene,
|
||
"providers": provider_list,
|
||
"dify_apps": dify_app_list,
|
||
"backends": backend_list,
|
||
"scenes": scene_list,
|
||
}
|
||
_save_llm_catalog_runtime(catalog)
|
||
|
||
if getattr(server, "robot", None) and getattr(server.robot, "config", None):
|
||
server.robot.config.reload()
|
||
# LLM 目录保存到 MySQL 后,需要主动失效运行时缓存,保证插件下一次调用直接走新目录。
|
||
server.robot.apply_runtime_config(reload_catalog=True)
|
||
else:
|
||
LLMRegistry.invalidate_cache()
|
||
|
||
return jsonify({"success": True, "message": "全局 LLM 配置已保存并应用到运行时"})
|
||
except Exception as e:
|
||
logger.error(f"保存全局 LLM 配置失败: {e}")
|
||
return jsonify({"success": False, "message": str(e)}), 500
|
||
|
||
|
||
@system_bp.route('/api/system/md2img_health', methods=['GET'])
|
||
@login_required
|
||
def get_md2img_health():
|
||
"""查询 Markdown 转图运行时健康状态。"""
|
||
try:
|
||
# 默认只读取状态,不主动拉起 runtime。
|
||
# 当后台希望“刷新并顺便拉起”时,可传 ensure_runtime=true。
|
||
ensure_runtime = str(request.args.get('ensure_runtime', 'false')).strip().lower() in {'1', 'true', 'yes', 'on'}
|
||
data = get_md2img_health_snapshot(ensure_runtime=ensure_runtime)
|
||
return jsonify({"success": True, "data": data})
|
||
except Exception as e:
|
||
logger.error(f"获取 md2img 健康状态失败: {e}")
|
||
return jsonify({"success": False, "message": str(e)}), 500
|
||
|
||
|
||
@system_bp.route('/api/system/md2img_warmup', methods=['POST'])
|
||
@login_required
|
||
def trigger_md2img_warmup():
|
||
"""手动触发 Markdown 转图浏览器预热。"""
|
||
try:
|
||
payload = request.get_json(silent=True) or {}
|
||
timeout_seconds = int(payload.get('timeout_seconds', 45) or 45)
|
||
timeout_seconds = max(10, min(timeout_seconds, 180))
|
||
ok = warmup_md2img_browser_sync(timeout_seconds=timeout_seconds)
|
||
data = get_md2img_health_snapshot(ensure_runtime=False)
|
||
if ok:
|
||
return jsonify({
|
||
"success": True,
|
||
"message": f"预热完成(timeout={timeout_seconds}s)",
|
||
"data": data,
|
||
})
|
||
return jsonify({
|
||
"success": False,
|
||
"message": f"预热失败(timeout={timeout_seconds}s),请查看运行日志",
|
||
"data": data,
|
||
}), 500
|
||
except Exception as e:
|
||
logger.error(f"触发 md2img 预热失败: {e}")
|
||
return jsonify({"success": False, "message": str(e)}), 500
|
||
|
||
|
||
@system_bp.route('/api/restart_service', methods=['POST'])
|
||
@login_required
|
||
def restart_service():
|
||
"""调用项目根目录下的 restart.sh 重启服务"""
|
||
try:
|
||
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
|
||
script_path = os.path.join(project_root, 'restart.sh')
|
||
|
||
if not os.path.exists(script_path):
|
||
return jsonify({"success": False, "message": f"未找到脚本: {script_path}"}), 404
|
||
|
||
subprocess.Popen(
|
||
['bash', script_path],
|
||
cwd=project_root,
|
||
stdout=subprocess.DEVNULL,
|
||
stderr=subprocess.DEVNULL,
|
||
start_new_session=True
|
||
)
|
||
|
||
logger.warning(f"后台触发服务重启脚本: {script_path}")
|
||
return jsonify({
|
||
"success": True,
|
||
"message": "已触发重启脚本,服务将在短时间内重启"
|
||
})
|
||
except Exception as e:
|
||
logger.error(f"触发服务重启失败: {e}")
|
||
return jsonify({"success": False, "message": str(e)}), 500
|