移除Glances并改造内置资源监控页面

This commit is contained in:
liuwei
2026-05-06 10:32:58 +08:00
parent e414562378
commit 9f3f6ffbae
4 changed files with 619 additions and 236 deletions

View File

@@ -1,157 +0,0 @@
import time
import threading
import subprocess
import requests
from loguru import logger
class GlancesMonitor:
def __init__(self, email_sender, host='192.168.2.170', port=61208,
cpu_threshold=80.0, load_threshold=None, io_threshold=80.0,
disk_usage_threshold=80.0, handle_threshold=20000,
monitor_interval=30, recipient=None):
"""初始化 Glances 监控组件
Args:
email_sender: 已初始化的 EmailSender 实例
host (str): Glances 主机地址
port (int): Glances Web 服务端口
cpu_threshold (float): CPU 使用率阈值 (%)
load_threshold (float): 系统负载阈值(默认 CPU 核心数 * 2
io_threshold (float): 磁盘 I/O 阈值MB/s
disk_usage_threshold (float): 磁盘占用阈值 (%)
handle_threshold (int): 句柄数阈值
recipient (str): 告警邮件接收者
"""
self.host = host
self.port = port
self.api_url = f"http://{self.host}:{self.port}/api/4"
self.cpu_threshold = cpu_threshold
self.load_threshold = load_threshold or (self.get_cpu_count() * 2)
self.io_threshold = io_threshold
self.disk_usage_threshold = disk_usage_threshold
self.handle_threshold = handle_threshold
self.email_sender = email_sender
self.recipient = recipient
self.glances_process = None
self.last_alert_times = {}
self._running = False
self.monitor_interval = monitor_interval
self._loop_index = 0
def get_cpu_count(self):
"""获取 CPU 核心数"""
try:
response = requests.get(f"{self.api_url}/cpu")
response.raise_for_status()
return response.json().get('count', 1)
except Exception as e:
logger.error(e)
return 1
def start_glances(self):
"""启动 Glances Web 服务"""
try:
subprocess.run(['glances', '--version'], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
self.glances_process = subprocess.Popen(
['glances', '-w', f'--port', str(self.port)],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
time.sleep(2)
if self.glances_process.poll() is not None:
raise RuntimeError("Glances 启动失败")
logger.info(f"Glances Web 服务已启动: http://{self.host}:{self.port}")
except subprocess.CalledProcessError:
logger.error("错误: Glances 未安装。请运行: python3.11 -m pip install glances")
raise
except Exception as e:
logger.error(f"启动 Glances 失败: {e}")
raise
def stop_glances(self):
"""停止 Glances 服务"""
if self.glances_process:
self.glances_process.terminate()
self.glances_process.wait()
logger.error("Glances Web 服务已停止")
def send_alert_email(self, metric, value, threshold):
"""发送告警邮件,限制每小时一次"""
if not self.email_sender or not self.recipient:
return
current_time = time.time()
last_alert_time = self.last_alert_times.get(metric, 0)
if current_time - last_alert_time < 3600:
return
subject = f"服务器告警: {metric} 过高"
body = f"警告: {metric} 当前值为 {value},超过阈值 {threshold}\n时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}"
if self.email_sender.send_email(self.recipient, subject, body):
self.last_alert_times[metric] = current_time
def monitor(self):
"""监控服务器指标并触发告警"""
while self._running:
try:
self._loop_index += 1
response = requests.get(f"{self.api_url}/cpu/total")
response.raise_for_status()
cpu_usage = response.json().get('total', 0)
if cpu_usage > self.cpu_threshold:
self.send_alert_email("CPU 使用率", cpu_usage, self.cpu_threshold)
response = requests.get(f"{self.api_url}/load")
response.raise_for_status()
load_avg = response.json().get('min1', 0)
if load_avg > self.load_threshold:
self.send_alert_email("系统负载1分钟", load_avg, self.load_threshold)
if self._loop_index % 6 == 0:
response = requests.get(f"{self.api_url}/diskio")
response.raise_for_status()
disks = response.json()
max_io_usage = 0
for disk in disks:
read_bytes = disk.get('read_bytes', 0)
write_bytes = disk.get('write_bytes', 0)
io_usage = (read_bytes + write_bytes) / (2048 * 1024)
max_io_usage = max(max_io_usage, io_usage)
if max_io_usage > self.io_threshold:
self.send_alert_email("磁盘 I/OMB/s", max_io_usage, self.io_threshold)
response = requests.get(f"{self.api_url}/fs")
response.raise_for_status()
filesystems = response.json()
for fs in filesystems:
disk_usage = fs.get('percent', 0)
if disk_usage > self.disk_usage_threshold:
self.send_alert_email(f"磁盘占用 ({fs.get('mnt_point')})", disk_usage,
self.disk_usage_threshold)
response = requests.get(f"{self.api_url}/processcount")
response.raise_for_status()
handle_count = response.json().get('total', 0)
if handle_count > self.handle_threshold:
self.send_alert_email("句柄数", handle_count, self.handle_threshold)
time.sleep(self.monitor_interval)
except requests.RequestException as e:
logger.error(f"连接 Glances API 失败: {e}")
time.sleep(60)
except Exception as e:
logger.error(f"监控错误: {e}")
time.sleep(60)
def run(self):
"""启动 Glances 服务和监控线程(非阻塞)"""
self._running = True
self.start_glances()
monitor_thread = threading.Thread(target=self.monitor, daemon=True)
monitor_thread.start()
def stop(self):
"""停止 Glances 服务和监控"""
self._running = False
self.stop_glances()

View File

@@ -4,6 +4,7 @@ from loguru import logger
import os
import time
import subprocess
import socket
from datetime import datetime
import platform
import psutil
@@ -23,6 +24,16 @@ system_bp = Blueprint('system', __name__)
# 记录应用启动时间
APP_START_TIME = time.time()
# 记录最近一次网络计数器采样,用于在资源监控页估算上/下行速率。
# 这里故意只做“页面级轻量采样”:
# 1. 不起额外守护线程,避免为了展示速率再引入常驻后台任务;
# 2. 只有用户刷新/轮询资源页时才计算速率,开销接近于零;
# 3. 即便进程重启缓存丢失,也只会让第一次速率显示为 0不影响整体可用性。
NETWORK_IO_SAMPLE = {
"timestamp": 0.0,
"bytes_sent": 0,
"bytes_recv": 0,
}
def _system_config_path() -> str:
@@ -68,6 +79,178 @@ def _format_bytes_to_mb(value: int) -> float:
return round((_safe_float(value, 0.0) / 1024 / 1024), 2)
def _safe_divide(numerator: float, denominator: float, default: float = 0.0) -> float:
"""安全除法,避免速率与占比计算时被 0 除打断。"""
try:
if not denominator:
return default
return numerator / denominator
except Exception:
return default
def _primary_disk_path() -> str:
"""返回当前系统最稳妥的主盘路径。"""
# 资源监控页既要兼容你本地 Windows 开发环境,也要兼容线上 Linux
# 1. 优先用系统根目录Linux 下是 /
# 2. Windows 下会自动变成当前盘符根路径;
# 3. 避免把磁盘路径硬编码成 /,导致本地调试时报错。
return os.path.abspath(os.sep)
def _format_datetime_text(timestamp_value: float | int | None) -> str:
"""把时间戳格式化为后台页面可直接展示的文本。"""
if not timestamp_value:
return "-"
try:
return datetime.fromtimestamp(float(timestamp_value)).strftime("%Y-%m-%d %H:%M:%S")
except Exception:
return "-"
def _sample_network_speed() -> dict:
"""根据两次页面采样估算网络上下行速率。"""
counters = psutil.net_io_counters()
now = time.time()
current_sent = _safe_int(getattr(counters, "bytes_sent", 0))
current_recv = _safe_int(getattr(counters, "bytes_recv", 0))
last_timestamp = _safe_float(NETWORK_IO_SAMPLE.get("timestamp"))
elapsed = max(now - last_timestamp, 0.0)
upload_speed = 0.0
download_speed = 0.0
if elapsed > 0 and last_timestamp > 0:
upload_speed = _safe_divide(current_sent - _safe_int(NETWORK_IO_SAMPLE.get("bytes_sent")), elapsed, 0.0)
download_speed = _safe_divide(current_recv - _safe_int(NETWORK_IO_SAMPLE.get("bytes_recv")), elapsed, 0.0)
upload_speed = max(upload_speed, 0.0)
download_speed = max(download_speed, 0.0)
NETWORK_IO_SAMPLE["timestamp"] = now
NETWORK_IO_SAMPLE["bytes_sent"] = current_sent
NETWORK_IO_SAMPLE["bytes_recv"] = current_recv
return {
"bytes_sent": current_sent,
"bytes_recv": current_recv,
"upload_speed_bps": round(upload_speed, 2),
"download_speed_bps": round(download_speed, 2),
}
def _extract_server_runtime_snapshot() -> dict:
"""构建资源监控页使用的轻量服务器运行态快照。"""
# 这套快照有意只覆盖“日常观察最有价值”的内容:
# 1. 主机资源CPU / 内存 / 磁盘 / 网络;
# 2. 应用进程:当前 ABOT 进程是否活着、吃了多少资源;
# 3. 基础设施MySQL / Redis 继续复用现有摘要探测;
# 4. 不再依赖 glances 进程,部署和运维负担会轻很多。
server = current_app.dashboard_server
current_process = psutil.Process(os.getpid())
virtual_memory = psutil.virtual_memory()
swap_memory = psutil.swap_memory()
cpu_usage = psutil.cpu_percent(interval=None)
process_cpu_usage = current_process.cpu_percent(interval=None)
boot_time = psutil.boot_time()
network_sample = _sample_network_speed()
disk_io = psutil.disk_io_counters()
try:
load_values = os.getloadavg()
except (AttributeError, OSError):
load_values = (0.0, 0.0, 0.0)
disk_items = []
seen_mountpoints = set()
for partition in psutil.disk_partitions(all=False):
mountpoint = str(getattr(partition, "mountpoint", "") or "").strip()
if not mountpoint or mountpoint in seen_mountpoints:
continue
seen_mountpoints.add(mountpoint)
try:
usage = psutil.disk_usage(mountpoint)
except Exception:
continue
disk_items.append({
"device": str(getattr(partition, "device", "") or "").strip() or mountpoint,
"mountpoint": mountpoint,
"fstype": str(getattr(partition, "fstype", "") or "").strip(),
"total_bytes": _safe_int(getattr(usage, "total", 0)),
"used_bytes": _safe_int(getattr(usage, "used", 0)),
"free_bytes": _safe_int(getattr(usage, "free", 0)),
"usage_percent": round(_safe_float(getattr(usage, "percent", 0.0)), 1),
})
disk_items.sort(key=lambda item: item.get("usage_percent", 0.0), reverse=True)
primary_disk_usage = psutil.disk_usage(_primary_disk_path())
process_memory = current_process.memory_info()
try:
open_files = len(current_process.open_files())
except Exception:
open_files = 0
try:
tcp_connections = current_process.connections(kind="inet")
established_connections = sum(
1 for conn in tcp_connections if str(getattr(conn, "status", "") or "").upper() == "ESTABLISHED"
)
except Exception:
established_connections = 0
return {
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"server": {
"hostname": socket.gethostname(),
"os": platform.system(),
"os_version": platform.version(),
"python_version": platform.python_version(),
"boot_time": _format_datetime_text(boot_time),
"uptime_seconds": round(max(time.time() - boot_time, 0), 2),
},
"cpu": {
"usage_percent": round(cpu_usage, 1),
"logical_count": psutil.cpu_count(logical=True) or 0,
"physical_count": psutil.cpu_count(logical=False) or 0,
"load_1": round(_safe_float(load_values[0]), 2),
"load_5": round(_safe_float(load_values[1]), 2),
"load_15": round(_safe_float(load_values[2]), 2),
},
"memory": {
"usage_percent": round(_safe_float(getattr(virtual_memory, "percent", 0.0)), 1),
"total_bytes": _safe_int(getattr(virtual_memory, "total", 0)),
"used_bytes": _safe_int(getattr(virtual_memory, "used", 0)),
"available_bytes": _safe_int(getattr(virtual_memory, "available", 0)),
"swap_usage_percent": round(_safe_float(getattr(swap_memory, "percent", 0.0)), 1),
"swap_total_bytes": _safe_int(getattr(swap_memory, "total", 0)),
"swap_used_bytes": _safe_int(getattr(swap_memory, "used", 0)),
},
"disk": {
"primary_usage_percent": round(_safe_float(getattr(primary_disk_usage, "percent", 0.0)), 1),
"primary_total_bytes": _safe_int(getattr(primary_disk_usage, "total", 0)),
"primary_used_bytes": _safe_int(getattr(primary_disk_usage, "used", 0)),
"io_read_bytes": _safe_int(getattr(disk_io, "read_bytes", 0)) if disk_io else 0,
"io_write_bytes": _safe_int(getattr(disk_io, "write_bytes", 0)) if disk_io else 0,
"items": disk_items[:8],
},
"network": {
**network_sample,
"established_connections": established_connections,
},
"process": {
"pid": current_process.pid,
"cpu_percent": round(process_cpu_usage, 1),
"memory_percent": round(current_process.memory_percent(), 2),
"memory_rss_bytes": _safe_int(getattr(process_memory, "rss", 0)),
"thread_count": current_process.num_threads(),
"open_files": open_files,
"create_time": _format_datetime_text(current_process.create_time()),
"uptime_seconds": round(max(time.time() - current_process.create_time(), 0), 2),
},
"infrastructure": {
"mysql": _extract_mysql_runtime_snapshot(server.db_manager),
"redis": _extract_redis_runtime_snapshot(server.db_manager),
},
}
def _extract_mysql_runtime_snapshot(db_manager) -> dict:
"""采集 MySQL 运行态摘要。
@@ -761,17 +944,11 @@ def api_docs():
@system_bp.route('/system_status')
@login_required
def system_status():
src = request.args.get('src')
if not src:
try:
server = current_app.dashboard_server
glances = getattr(server.robot, "config").glances if hasattr(server.robot, "config") else {}
host = glances.get("host", "127.0.0.1")
port = glances.get("port", 61208)
src = f"http://{host}:{port}/"
except Exception:
src = "http://127.0.0.1:61208/"
return render_template('system_status.html', src_url=src)
# 资源监控页改为项目内置轻量面板:
# 1. 不再依赖 glances 独立进程;
# 2. 页面只消费当前服务自身的 API
# 3. 线上部署时不用额外开放 61208 之类的端口。
return render_template('system_status.html')
@system_bp.route('/system_llm')
@@ -811,6 +988,20 @@ def api_system_info():
return jsonify({"success": False, "error": str(e)}), 500
@system_bp.route('/api/system_status_overview')
@login_required
def api_system_status_overview():
"""返回资源监控页使用的轻量服务器快照。"""
try:
return jsonify({
"success": True,
"data": _extract_server_runtime_snapshot(),
})
except Exception as e:
logger.error(f"获取资源监控快照失败: {e}")
return jsonify({"success": False, "error": str(e)}), 500
@system_bp.route('/api/system_health_summary')
@login_required
def api_system_health_summary():

View File

@@ -6,9 +6,23 @@
<div class="page-shell system-page">
<div class="page-hero">
<div class="page-hero-copy">
<div class="page-eyebrow">System Workspace</div>
<div class="page-eyebrow">Resource Overview</div>
<h1>资源监控</h1>
<p>直接在后台查看系统资源变化与运行状态,保持监控入口简洁清晰</p>
<p>直接观察 ABOT 所在服务器的关键资源、应用进程和基础设施状态,不再依赖额外的 glances 进程</p>
<div class="hero-meta-row">
<div class="hero-meta-pill">
<span class="hero-meta-label">最近刷新</span>
<span class="hero-meta-value">{% raw %}{{ statusOverview.timestamp || '-' }}{% endraw %}</span>
</div>
<div class="hero-meta-pill" v-if="statusOverview.server">
<span class="hero-meta-label">主机</span>
<span class="hero-meta-value">{% raw %}{{ statusOverview.server.hostname || '-' }}{% endraw %}</span>
</div>
<div class="hero-meta-pill" v-if="statusOverview.process">
<span class="hero-meta-label">PID</span>
<span class="hero-meta-value">{% raw %}{{ statusOverview.process.pid || '-' }}{% endraw %}</span>
</div>
</div>
<div class="md2img-health-inline" v-loading="md2imgLoading">
<span class="health-title">转图运行时</span>
<el-tag size="mini" :type="runtimeTagType">{% raw %}{{ runtimeTagText }}{% endraw %}</el-tag>
@@ -23,27 +37,221 @@
<el-button type="success" plain :loading="md2imgWarming" @click="warmupMd2Img">
<i class="el-icon-magic-stick"></i> 预热转图
</el-button>
<el-button type="info" plain :loading="md2imgLoading" @click="loadMd2ImgHealth">
<i class="el-icon-refresh"></i> 刷新转图状态
<el-button type="info" plain :loading="loading" @click="loadStatusOverview(true)">
<i class="el-icon-refresh"></i> 刷新监控
</el-button>
<el-button type="danger" @click="confirmRestart">
<i class="el-icon-refresh-left"></i> 重启服务
</el-button>
<el-button type="primary" plain @click="reloadIframe"><i class="el-icon-refresh"></i> 刷新面板</el-button>
<el-button type="primary" @click="openInNewTab"><i class="el-icon-top-right"></i> 新窗口打开</el-button>
<el-button type="danger" @click="confirmRestart"><i class="el-icon-refresh-left"></i> 重启服务</el-button>
</div>
</div>
<el-card class="iframe-shell-card" shadow="hover">
<div slot="header" class="workspace-header">
<div>
<h3>监控面板</h3>
<p>直接在控制台内查看系统资源变化与运行状态。</p>
<div class="stats-grid">
<el-card class="stat-card" shadow="hover">
<div class="stat-card__label">CPU 使用率</div>
<div class="stat-card__value">{% raw %}{{ formatPercent(cpu.usage_percent) }}{% endraw %}</div>
<div class="stat-card__meta">{% raw %}{{ cpu.logical_count || 0 }}{% endraw %} 线程 · load1 {% raw %}{{ formatNumber(cpu.load_1, 2) }}{% endraw %}</div>
<el-progress :percentage="normalizePercent(cpu.usage_percent)" :stroke-width="10" :status="progressStatus(cpu.usage_percent, 65, 85)"></el-progress>
</el-card>
<el-card class="stat-card" shadow="hover">
<div class="stat-card__label">内存使用率</div>
<div class="stat-card__value">{% raw %}{{ formatPercent(memory.usage_percent) }}{% endraw %}</div>
<div class="stat-card__meta">{% raw %}{{ formatBytes(memory.used_bytes) }}{% endraw %} / {% raw %}{{ formatBytes(memory.total_bytes) }}{% endraw %}</div>
<el-progress :percentage="normalizePercent(memory.usage_percent)" :stroke-width="10" :status="progressStatus(memory.usage_percent, 70, 88)"></el-progress>
</el-card>
<el-card class="stat-card" shadow="hover">
<div class="stat-card__label">主盘使用率</div>
<div class="stat-card__value">{% raw %}{{ formatPercent(disk.primary_usage_percent) }}{% endraw %}</div>
<div class="stat-card__meta">{% raw %}{{ formatBytes(disk.primary_used_bytes) }}{% endraw %} / {% raw %}{{ formatBytes(disk.primary_total_bytes) }}{% endraw %}</div>
<el-progress :percentage="normalizePercent(disk.primary_usage_percent)" :stroke-width="10" :status="progressStatus(disk.primary_usage_percent, 75, 90)"></el-progress>
</el-card>
<el-card class="stat-card" shadow="hover">
<div class="stat-card__label">网络速率</div>
<div class="stat-card__value">{% raw %}{{ formatSpeed(network.download_speed_bps) }}{% endraw %}</div>
<div class="stat-card__meta">上行 {% raw %}{{ formatSpeed(network.upload_speed_bps) }}{% endraw %} · 连接 {% raw %}{{ network.established_connections || 0 }}{% endraw %}</div>
<div class="network-balance">
<span>累计上行 {% raw %}{{ formatBytes(network.bytes_sent) }}{% endraw %}</span>
<span>累计下行 {% raw %}{{ formatBytes(network.bytes_recv) }}{% endraw %}</span>
</div>
<div class="iframe-url">{{ src_url }}</div>
</div>
<div class="iframe-shell">
<iframe ref="monitorFrame" src="{{ src_url }}" frameborder="0"></iframe>
</div>
</el-card>
</el-card>
</div>
<div class="monitor-grid">
<el-card class="workspace-card" shadow="hover">
<div slot="header" class="workspace-header">
<div>
<h3>系统概览</h3>
<p>观察主机、Python 运行环境和负载情况,快速判断是不是机器层面的问题。</p>
</div>
</div>
<div class="detail-grid">
<div class="detail-item">
<span class="detail-item__label">主机名</span>
<span class="detail-item__value">{% raw %}{{ server.hostname || '-' }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">操作系统</span>
<span class="detail-item__value">{% raw %}{{ server.os || '-' }}{% endraw %}</span>
</div>
<div class="detail-item detail-item--wide">
<span class="detail-item__label">系统版本</span>
<span class="detail-item__value">{% raw %}{{ server.os_version || '-' }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">Python</span>
<span class="detail-item__value">{% raw %}{{ server.python_version || '-' }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">开机时间</span>
<span class="detail-item__value">{% raw %}{{ server.boot_time || '-' }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">系统运行时长</span>
<span class="detail-item__value">{% raw %}{{ formatDuration(server.uptime_seconds) }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">Load 1 / 5 / 15</span>
<span class="detail-item__value">{% raw %}{{ formatNumber(cpu.load_1, 2) }} / {{ formatNumber(cpu.load_5, 2) }} / {{ formatNumber(cpu.load_15, 2) }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">CPU 核心</span>
<span class="detail-item__value">{% raw %}{{ cpu.physical_count || 0 }}{% endraw %} 物理 / {% raw %}{{ cpu.logical_count || 0 }}{% endraw %} 逻辑</span>
</div>
<div class="detail-item">
<span class="detail-item__label">Swap 使用率</span>
<span class="detail-item__value">{% raw %}{{ formatPercent(memory.swap_usage_percent) }}{% endraw %}</span>
</div>
</div>
</el-card>
<el-card class="workspace-card" shadow="hover">
<div slot="header" class="workspace-header">
<div>
<h3>ABOT 进程</h3>
<p>确认当前应用自身的资源占用,避免只看主机而忽略进程级热点。</p>
</div>
</div>
<div class="detail-grid">
<div class="detail-item">
<span class="detail-item__label">PID</span>
<span class="detail-item__value">{% raw %}{{ process.pid || '-' }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">进程 CPU</span>
<span class="detail-item__value">{% raw %}{{ formatPercent(process.cpu_percent) }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">进程内存</span>
<span class="detail-item__value">{% raw %}{{ formatBytes(process.memory_rss_bytes) }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">内存占比</span>
<span class="detail-item__value">{% raw %}{{ formatPercent(process.memory_percent) }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">线程数</span>
<span class="detail-item__value">{% raw %}{{ process.thread_count || 0 }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">打开文件</span>
<span class="detail-item__value">{% raw %}{{ process.open_files || 0 }}{% endraw %}</span>
</div>
<div class="detail-item detail-item--wide">
<span class="detail-item__label">进程启动时间</span>
<span class="detail-item__value">{% raw %}{{ process.create_time || '-' }}{% endraw %}</span>
</div>
<div class="detail-item detail-item--wide">
<span class="detail-item__label">进程运行时长</span>
<span class="detail-item__value">{% raw %}{{ formatDuration(process.uptime_seconds) }}{% endraw %}</span>
</div>
</div>
</el-card>
<el-card class="workspace-card workspace-card--wide" shadow="hover">
<div slot="header" class="workspace-header">
<div>
<h3>基础设施运行态</h3>
<p>把数据库和缓存的关键摘要放在同一屏里,日常看状态不需要再跳出去。</p>
</div>
</div>
<div class="infra-grid">
<div class="infra-panel" :class="`infra-panel--${infrastructure.mysql.status || 'warning'}`">
<div class="infra-panel__head">
<div>
<div class="infra-panel__title">MySQL</div>
<div class="infra-panel__summary">{% raw %}{{ infrastructure.mysql.summary || '暂无状态' }}{% endraw %}</div>
</div>
<el-tag size="mini" :type="tagType(infrastructure.mysql.status)">{% raw %}{{ statusText(infrastructure.mysql.status) }}{% endraw %}</el-tag>
</div>
<div class="infra-metrics">
<div class="infra-metric"><span>数据库</span><strong>{% raw %}{{ infrastructure.mysql.database || '-' }}{% endraw %}</strong></div>
<div class="infra-metric"><span>版本</span><strong>{% raw %}{{ infrastructure.mysql.version || '-' }}{% endraw %}</strong></div>
<div class="infra-metric"><span>连接数</span><strong>{% raw %}{{ infrastructure.mysql.threads_connected || 0 }}{% endraw %} / {% raw %}{{ infrastructure.mysql.max_connections || '-' }}{% endraw %}</strong></div>
<div class="infra-metric"><span>连接负载</span><strong>{% raw %}{{ formatPercent(infrastructure.mysql.connection_usage_percent) }}{% endraw %}</strong></div>
<div class="infra-metric"><span>运行线程</span><strong>{% raw %}{{ infrastructure.mysql.threads_running || 0 }}{% endraw %}</strong></div>
<div class="infra-metric"><span>QPS</span><strong>{% raw %}{{ formatNumber(infrastructure.mysql.questions_per_second, 2) }}{% endraw %}</strong></div>
<div class="infra-metric"><span>表数量</span><strong>{% raw %}{{ infrastructure.mysql.table_count || 0 }}{% endraw %}</strong></div>
<div class="infra-metric"><span>库体量</span><strong>{% raw %}{{ formatMb(infrastructure.mysql.schema_size_mb) }}{% endraw %}</strong></div>
</div>
</div>
<div class="infra-panel" :class="`infra-panel--${infrastructure.redis.status || 'warning'}`">
<div class="infra-panel__head">
<div>
<div class="infra-panel__title">Redis</div>
<div class="infra-panel__summary">{% raw %}{{ infrastructure.redis.summary || '暂无状态' }}{% endraw %}</div>
</div>
<el-tag size="mini" :type="tagType(infrastructure.redis.status)">{% raw %}{{ statusText(infrastructure.redis.status) }}{% endraw %}</el-tag>
</div>
<div class="infra-metrics">
<div class="infra-metric"><span>DB</span><strong>{% raw %}{{ infrastructure.redis.db_index || 0 }}{% endraw %}</strong></div>
<div class="infra-metric"><span>Key 数量</span><strong>{% raw %}{{ infrastructure.redis.key_count || 0 }}{% endraw %}</strong></div>
<div class="infra-metric"><span>客户端</span><strong>{% raw %}{{ infrastructure.redis.connected_clients || 0 }}{% endraw %}</strong></div>
<div class="infra-metric"><span>阻塞客户端</span><strong>{% raw %}{{ infrastructure.redis.blocked_clients || 0 }}{% endraw %}</strong></div>
<div class="infra-metric"><span>OPS/s</span><strong>{% raw %}{{ infrastructure.redis.ops_per_sec || 0 }}{% endraw %}</strong></div>
<div class="infra-metric"><span>命中率</span><strong>{% raw %}{{ formatPercent(infrastructure.redis.hit_rate_percent) }}{% endraw %}</strong></div>
<div class="infra-metric"><span>已用内存</span><strong>{% raw %}{{ infrastructure.redis.used_memory_human || '-' }}{% endraw %}</strong></div>
<div class="infra-metric"><span>峰值内存</span><strong>{% raw %}{{ infrastructure.redis.used_memory_peak_human || '-' }}{% endraw %}</strong></div>
</div>
</div>
</div>
</el-card>
<el-card class="workspace-card workspace-card--wide" shadow="hover">
<div slot="header" class="workspace-header">
<div>
<h3>磁盘挂载点</h3>
<p>按使用率排序展示常用挂载点,方便快速发现哪个分区快满了。</p>
</div>
</div>
<el-table :data="disk.items || []" size="mini" style="width: 100%">
<el-table-column prop="mountpoint" label="挂载点" min-width="180"></el-table-column>
<el-table-column prop="device" label="设备" min-width="180"></el-table-column>
<el-table-column prop="fstype" label="文件系统" width="110"></el-table-column>
<el-table-column label="已用 / 总量" min-width="170">
<template slot-scope="scope">
{% raw %}{{ formatBytes(scope.row.used_bytes) }} / {{ formatBytes(scope.row.total_bytes) }}{% endraw %}
</template>
</el-table-column>
<el-table-column label="使用率" min-width="180">
<template slot-scope="scope">
<div class="table-progress-cell">
<span>{% raw %}{{ formatPercent(scope.row.usage_percent) }}{% endraw %}</span>
<el-progress
:percentage="normalizePercent(scope.row.usage_percent)"
:stroke-width="8"
:status="progressStatus(scope.row.usage_percent, 75, 90)">
</el-progress>
</div>
</template>
</el-table-column>
<el-table-column label="剩余空间" min-width="120">
<template slot-scope="scope">
{% raw %}{{ formatBytes(scope.row.free_bytes) }}{% endraw %}
</template>
</el-table-column>
</el-table>
</el-card>
</div>
</div>
{% endblock %}
@@ -55,14 +263,49 @@
data() {
return {
currentView: '14',
frameUrl: '{{ src_url }}',
restarting: false,
loading: false,
md2imgLoading: false,
md2imgWarming: false,
md2imgHealth: null
autoRefreshTimer: null,
md2imgHealth: null,
statusOverview: {
timestamp: '',
server: {},
cpu: {},
memory: {},
disk: { items: [] },
network: {},
process: {},
infrastructure: {
mysql: {},
redis: {}
}
}
}
},
computed: {
server() {
return this.statusOverview.server || {};
},
cpu() {
return this.statusOverview.cpu || {};
},
memory() {
return this.statusOverview.memory || {};
},
disk() {
return this.statusOverview.disk || { items: [] };
},
network() {
return this.statusOverview.network || {};
},
process() {
return this.statusOverview.process || {};
},
infrastructure() {
return this.statusOverview.infrastructure || { mysql: {}, redis: {} };
},
runtimeTagText() {
const runtime = this.md2imgHealth && this.md2imgHealth.runtime ? this.md2imgHealth.runtime : {};
return runtime.loop_running ? '运行时在线' : '运行时未就绪';
@@ -73,7 +316,7 @@
},
browserTagText() {
const browser = this.md2imgHealth && this.md2imgHealth.browser ? this.md2imgHealth.browser : {};
return browser.connected ? '浏览器已连接' : '浏览器未连接';
return browser.connected ? '浏览器已连接' : 'info';
},
browserTagType() {
const browser = this.md2imgHealth && this.md2imgHealth.browser ? this.md2imgHealth.browser : {};
@@ -91,16 +334,106 @@
},
mounted() {
this.currentView = '14';
this.loadStatusOverview(false);
this.loadMd2ImgHealth();
// 资源页采用前端轮询即可满足日常观察,不需要为了监控再起单独守护进程。
this.autoRefreshTimer = setInterval(() => {
this.loadStatusOverview(false);
}, 10000);
},
beforeDestroy() {
if (this.autoRefreshTimer) {
clearInterval(this.autoRefreshTimer);
this.autoRefreshTimer = null;
}
},
methods: {
reloadIframe() {
if (this.$refs.monitorFrame) {
this.$refs.monitorFrame.src = this.frameUrl;
async loadStatusOverview(showToast) {
if (this.loading) return;
this.loading = true;
try {
const response = await axios.get('/api/system_status_overview');
if (response.data && response.data.success) {
this.statusOverview = response.data.data || this.statusOverview;
if (showToast) {
this.$message.success('资源监控已刷新');
}
} else {
this.$message.error(response.data?.message || '获取资源监控失败');
}
} catch (error) {
this.$message.error(error.response?.data?.message || '获取资源监控失败');
} finally {
this.loading = false;
}
},
openInNewTab() {
window.open(this.frameUrl, '_blank');
normalizePercent(value) {
const number = Number(value || 0);
if (Number.isNaN(number)) return 0;
return Math.max(0, Math.min(100, Number(number.toFixed(1))));
},
progressStatus(value, warningThreshold, dangerThreshold) {
const percent = this.normalizePercent(value);
if (percent >= dangerThreshold) return 'exception';
if (percent >= warningThreshold) return 'warning';
return 'success';
},
formatNumber(value, digits = 0) {
if (value === null || value === undefined || value === '') return '-';
const number = Number(value);
if (Number.isNaN(number)) return String(value);
return number.toFixed(digits);
},
formatPercent(value) {
if (value === null || value === undefined || value === '') return '-';
return `${this.formatNumber(value, 1)}%`;
},
formatBytes(value) {
const bytes = Number(value || 0);
if (!Number.isFinite(bytes) || bytes <= 0) return '0 B';
const units = ['B', 'KB', 'MB', 'GB', 'TB'];
let size = bytes;
let index = 0;
while (size >= 1024 && index < units.length - 1) {
size /= 1024;
index += 1;
}
const digits = index === 0 ? 0 : (size >= 100 ? 0 : (size >= 10 ? 1 : 2));
return `${size.toFixed(digits)} ${units[index]}`;
},
formatSpeed(value) {
return `${this.formatBytes(value)}/s`;
},
formatDuration(seconds) {
const total = parseInt(seconds || 0, 10);
if (!total || total <= 0) return '-';
const days = Math.floor(total / 86400);
const hours = Math.floor((total % 86400) / 3600);
const minutes = Math.floor((total % 3600) / 60);
if (days > 0) return `${days}${hours}小时`;
if (hours > 0) return `${hours}小时 ${minutes}分钟`;
return `${minutes}分钟`;
},
formatMb(value) {
const number = Number(value || 0);
if (!Number.isFinite(number) || number <= 0) return '0 MB';
return `${number.toFixed(number >= 100 ? 0 : 1)} MB`;
},
statusText(status) {
const mapping = {
healthy: '健康',
warning: '关注',
danger: '异常'
};
return mapping[status] || '未知';
},
tagType(status) {
const mapping = {
healthy: 'success',
warning: 'warning',
danger: 'danger'
};
return mapping[status] || 'info';
},
confirmRestart() {
this.$confirm('确认执行 ./restart.sh 重启服务吗?这会中断当前服务几秒钟。', '重启确认', {
@@ -131,7 +464,6 @@
if (this.md2imgLoading) return;
this.md2imgLoading = true;
try {
// 默认不强制拉起 runtime避免纯查看状态时引入副作用。
const response = await axios.get('/api/system/md2img_health');
if (response.data && response.data.success) {
this.md2imgHealth = response.data.data || null;
@@ -170,44 +502,79 @@
.page-shell { display: flex; flex-direction: column; gap: 16px; }
.page-hero {
display: flex; align-items: flex-end; justify-content: space-between; gap: 18px; padding: 24px 26px; border-radius: 24px;
background: linear-gradient(135deg, rgba(79,70,229,0.10), rgba(59,130,246,0.08), rgba(255,255,255,0.9));
background: linear-gradient(135deg, rgba(15,23,42,0.05), rgba(59,130,246,0.10), rgba(255,255,255,0.95));
border: 1px solid rgba(148, 163, 184, 0.16); box-shadow: 0 18px 40px rgba(15, 23, 42, 0.06);
}
.page-hero-actions { display: flex; align-items: center; gap: 12px; }
.page-eyebrow { font-size: 12px; text-transform: uppercase; letter-spacing: .08em; color: #6366f1; font-weight: 700; margin-bottom: 8px; }
.page-hero-actions { display: flex; align-items: center; gap: 12px; flex-wrap: wrap; }
.page-eyebrow { font-size: 12px; text-transform: uppercase; letter-spacing: .08em; color: #2563eb; font-weight: 700; margin-bottom: 8px; }
.page-hero-copy h1 { font-size: 30px; line-height: 1.1; margin-bottom: 10px; color: #0f172a; }
.page-hero-copy p { color: #64748b; font-size: 14px; }
.hero-meta-row { display: flex; gap: 10px; flex-wrap: wrap; margin-top: 14px; }
.hero-meta-pill {
display: inline-flex; align-items: center; gap: 8px; padding: 8px 12px; border-radius: 999px;
background: rgba(255,255,255,0.8); border: 1px solid rgba(148,163,184,0.14); color: #475569; font-size: 12px;
}
.hero-meta-label { color: #94a3b8; }
.hero-meta-value { color: #0f172a; font-weight: 700; }
.md2img-health-inline {
margin-top: 12px;
display: flex;
align-items: center;
gap: 8px;
flex-wrap: wrap;
padding: 8px 10px;
border-radius: 12px;
background: rgba(255,255,255,0.72);
border: 1px solid rgba(148, 163, 184, 0.18);
margin-top: 12px; display: flex; align-items: center; gap: 8px; flex-wrap: wrap; padding: 8px 10px;
border-radius: 12px; background: rgba(255,255,255,0.72); border: 1px solid rgba(148, 163, 184, 0.18);
}
.health-title { font-size: 12px; font-weight: 700; color: #334155; }
.health-brief { font-size: 12px; color: #475569; }
.health-time { font-size: 12px; color: #94a3b8; }
.stats-grid { display: grid; grid-template-columns: repeat(4, minmax(0, 1fr)); gap: 16px; }
.stat-card { border-radius: 20px; }
.stat-card__label { color: #64748b; font-size: 13px; margin-bottom: 10px; }
.stat-card__value { color: #0f172a; font-size: 30px; font-weight: 700; line-height: 1; margin-bottom: 10px; }
.stat-card__meta { color: #475569; font-size: 12px; margin-bottom: 12px; min-height: 18px; }
.network-balance { margin-top: 12px; display: flex; justify-content: space-between; gap: 10px; color: #64748b; font-size: 12px; }
.monitor-grid { display: grid; grid-template-columns: repeat(2, minmax(0, 1fr)); gap: 16px; }
.workspace-card { border-radius: 20px; }
.workspace-card--wide { grid-column: 1 / -1; }
.workspace-header { display: flex; align-items: center; justify-content: space-between; gap: 16px; }
.workspace-header h3 { font-size: 18px; margin-bottom: 4px; }
.workspace-header p { font-size: 13px; color: #64748b; }
.iframe-url {
max-width: 40%; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; font-size: 12px;
color: #94a3b8; padding: 8px 12px; border-radius: 999px; background: rgba(248,250,252,0.9); border: 1px solid rgba(148,163,184,0.12);
.detail-grid { display: grid; grid-template-columns: repeat(2, minmax(0, 1fr)); gap: 12px; }
.detail-item {
display: flex; flex-direction: column; gap: 6px; padding: 14px 16px; border-radius: 16px;
background: rgba(248,250,252,0.82); border: 1px solid rgba(148,163,184,0.12);
}
.detail-item--wide { grid-column: 1 / -1; }
.detail-item__label { color: #64748b; font-size: 12px; }
.detail-item__value { color: #0f172a; font-size: 15px; font-weight: 700; word-break: break-all; }
.infra-grid { display: grid; grid-template-columns: repeat(2, minmax(0, 1fr)); gap: 14px; }
.infra-panel {
padding: 16px; border-radius: 18px; border: 1px solid rgba(148,163,184,0.12); background: rgba(248,250,252,0.86);
}
.infra-panel--healthy { box-shadow: inset 0 0 0 1px rgba(16,185,129,0.08); }
.infra-panel--warning { box-shadow: inset 0 0 0 1px rgba(245,158,11,0.10); }
.infra-panel--danger { box-shadow: inset 0 0 0 1px rgba(239,68,68,0.10); }
.infra-panel__head { display: flex; align-items: flex-start; justify-content: space-between; gap: 12px; margin-bottom: 14px; }
.infra-panel__title { color: #0f172a; font-size: 16px; font-weight: 700; }
.infra-panel__summary { color: #64748b; font-size: 12px; line-height: 1.6; margin-top: 4px; }
.infra-metrics { display: grid; grid-template-columns: repeat(2, minmax(0, 1fr)); gap: 10px 12px; }
.infra-metric {
display: flex; flex-direction: column; gap: 4px; padding: 10px 12px; border-radius: 14px;
background: rgba(255,255,255,0.75); border: 1px solid rgba(148,163,184,0.10);
}
.infra-metric span { color: #64748b; font-size: 12px; }
.infra-metric strong { color: #0f172a; font-size: 14px; font-weight: 700; word-break: break-word; }
.table-progress-cell { display: grid; grid-template-columns: 60px 1fr; align-items: center; gap: 10px; }
@media (max-width: 1200px) {
.stats-grid { grid-template-columns: repeat(2, minmax(0, 1fr)); }
.monitor-grid { grid-template-columns: 1fr; }
.infra-grid { grid-template-columns: 1fr; }
}
.iframe-shell-card { height: calc(100vh - 230px); }
.iframe-shell-card .el-card__body { height: calc(100% - 73px); }
.iframe-shell { height: 100%; border-radius: 18px; overflow: hidden; border: 1px solid rgba(148,163,184,0.12); background: rgba(248,250,252,0.82); }
.iframe-shell iframe { width: 100%; height: 100%; border: none; display: block; background: #fff; }
@media (max-width: 960px) {
.page-hero { flex-direction: column; align-items: flex-start; }
.workspace-header { flex-direction: column; align-items: flex-start; }
.page-hero-actions { flex-wrap: wrap; }
.iframe-url { max-width: 100%; }
.md2img-health-inline { width: 100%; }
.detail-grid { grid-template-columns: 1fr; }
.detail-item--wide { grid-column: auto; }
}
@media (max-width: 640px) {
.stats-grid { grid-template-columns: 1fr; }
.network-balance { flex-direction: column; }
.infra-metrics { grid-template-columns: 1fr; }
}
</style>
{% endblock %}

18
main.py
View File

@@ -3,7 +3,6 @@
import asyncio
import threading
from admin.GlancesMonitor import GlancesMonitor
from utils.decorator.async_job import async_job
from utils.markdown_to_image import warmup_md2img_browser
from configuration import Config
@@ -126,23 +125,6 @@ def main():
robot.LOG.info(f"Dashboard服务器已在 http://{dashboard_server.host}:{dashboard_server.port} 启动")
except Exception as e:
robot.LOG.error(f"Dashboard服务器启动失败: {e}")
try:
robot.LOG.debug(f"开始启动GlancesMonitor")
# 初始化 Glances 监控
monitor = GlancesMonitor(
email_sender=robot.email_sender,
host=config.glances.get("host"),
port=config.glances.get("port"),
cpu_threshold=80.0,
load_threshold=16, # 自动设为 CPU 核心数 * 2
io_threshold=100.0,
disk_usage_threshold=70.0,
handle_threshold=20000,
recipient=config.email.get("alert_recipient")
)
monitor.run()
except Exception as e:
robot.LOG.error(f"GlancesMonitor服务器启动失败: {e}")
# 启动后在“调度器同一事件循环”中预热 Markdown 转图浏览器。
# 这样可确保预热得到的常驻浏览器与后续截图任务复用同一 loop避免跨 loop 句柄失效。