移除Glances并改造内置资源监控页面

This commit is contained in:
liuwei
2026-05-06 10:32:58 +08:00
parent e414562378
commit 9f3f6ffbae
4 changed files with 619 additions and 236 deletions

View File

@@ -1,157 +0,0 @@
import time
import threading
import subprocess
import requests
from loguru import logger
class GlancesMonitor:
def __init__(self, email_sender, host='192.168.2.170', port=61208,
cpu_threshold=80.0, load_threshold=None, io_threshold=80.0,
disk_usage_threshold=80.0, handle_threshold=20000,
monitor_interval=30, recipient=None):
"""初始化 Glances 监控组件
Args:
email_sender: 已初始化的 EmailSender 实例
host (str): Glances 主机地址
port (int): Glances Web 服务端口
cpu_threshold (float): CPU 使用率阈值 (%)
load_threshold (float): 系统负载阈值(默认 CPU 核心数 * 2
io_threshold (float): 磁盘 I/O 阈值MB/s
disk_usage_threshold (float): 磁盘占用阈值 (%)
handle_threshold (int): 句柄数阈值
recipient (str): 告警邮件接收者
"""
self.host = host
self.port = port
self.api_url = f"http://{self.host}:{self.port}/api/4"
self.cpu_threshold = cpu_threshold
self.load_threshold = load_threshold or (self.get_cpu_count() * 2)
self.io_threshold = io_threshold
self.disk_usage_threshold = disk_usage_threshold
self.handle_threshold = handle_threshold
self.email_sender = email_sender
self.recipient = recipient
self.glances_process = None
self.last_alert_times = {}
self._running = False
self.monitor_interval = monitor_interval
self._loop_index = 0
def get_cpu_count(self):
"""获取 CPU 核心数"""
try:
response = requests.get(f"{self.api_url}/cpu")
response.raise_for_status()
return response.json().get('count', 1)
except Exception as e:
logger.error(e)
return 1
def start_glances(self):
"""启动 Glances Web 服务"""
try:
subprocess.run(['glances', '--version'], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
self.glances_process = subprocess.Popen(
['glances', '-w', f'--port', str(self.port)],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
time.sleep(2)
if self.glances_process.poll() is not None:
raise RuntimeError("Glances 启动失败")
logger.info(f"Glances Web 服务已启动: http://{self.host}:{self.port}")
except subprocess.CalledProcessError:
logger.error("错误: Glances 未安装。请运行: python3.11 -m pip install glances")
raise
except Exception as e:
logger.error(f"启动 Glances 失败: {e}")
raise
def stop_glances(self):
"""停止 Glances 服务"""
if self.glances_process:
self.glances_process.terminate()
self.glances_process.wait()
logger.error("Glances Web 服务已停止")
def send_alert_email(self, metric, value, threshold):
"""发送告警邮件,限制每小时一次"""
if not self.email_sender or not self.recipient:
return
current_time = time.time()
last_alert_time = self.last_alert_times.get(metric, 0)
if current_time - last_alert_time < 3600:
return
subject = f"服务器告警: {metric} 过高"
body = f"警告: {metric} 当前值为 {value},超过阈值 {threshold}\n时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}"
if self.email_sender.send_email(self.recipient, subject, body):
self.last_alert_times[metric] = current_time
def monitor(self):
"""监控服务器指标并触发告警"""
while self._running:
try:
self._loop_index += 1
response = requests.get(f"{self.api_url}/cpu/total")
response.raise_for_status()
cpu_usage = response.json().get('total', 0)
if cpu_usage > self.cpu_threshold:
self.send_alert_email("CPU 使用率", cpu_usage, self.cpu_threshold)
response = requests.get(f"{self.api_url}/load")
response.raise_for_status()
load_avg = response.json().get('min1', 0)
if load_avg > self.load_threshold:
self.send_alert_email("系统负载1分钟", load_avg, self.load_threshold)
if self._loop_index % 6 == 0:
response = requests.get(f"{self.api_url}/diskio")
response.raise_for_status()
disks = response.json()
max_io_usage = 0
for disk in disks:
read_bytes = disk.get('read_bytes', 0)
write_bytes = disk.get('write_bytes', 0)
io_usage = (read_bytes + write_bytes) / (2048 * 1024)
max_io_usage = max(max_io_usage, io_usage)
if max_io_usage > self.io_threshold:
self.send_alert_email("磁盘 I/OMB/s", max_io_usage, self.io_threshold)
response = requests.get(f"{self.api_url}/fs")
response.raise_for_status()
filesystems = response.json()
for fs in filesystems:
disk_usage = fs.get('percent', 0)
if disk_usage > self.disk_usage_threshold:
self.send_alert_email(f"磁盘占用 ({fs.get('mnt_point')})", disk_usage,
self.disk_usage_threshold)
response = requests.get(f"{self.api_url}/processcount")
response.raise_for_status()
handle_count = response.json().get('total', 0)
if handle_count > self.handle_threshold:
self.send_alert_email("句柄数", handle_count, self.handle_threshold)
time.sleep(self.monitor_interval)
except requests.RequestException as e:
logger.error(f"连接 Glances API 失败: {e}")
time.sleep(60)
except Exception as e:
logger.error(f"监控错误: {e}")
time.sleep(60)
def run(self):
"""启动 Glances 服务和监控线程(非阻塞)"""
self._running = True
self.start_glances()
monitor_thread = threading.Thread(target=self.monitor, daemon=True)
monitor_thread.start()
def stop(self):
"""停止 Glances 服务和监控"""
self._running = False
self.stop_glances()

View File

@@ -4,6 +4,7 @@ from loguru import logger
import os import os
import time import time
import subprocess import subprocess
import socket
from datetime import datetime from datetime import datetime
import platform import platform
import psutil import psutil
@@ -23,6 +24,16 @@ system_bp = Blueprint('system', __name__)
# 记录应用启动时间 # 记录应用启动时间
APP_START_TIME = time.time() APP_START_TIME = time.time()
# 记录最近一次网络计数器采样,用于在资源监控页估算上/下行速率。
# 这里故意只做“页面级轻量采样”:
# 1. 不起额外守护线程,避免为了展示速率再引入常驻后台任务;
# 2. 只有用户刷新/轮询资源页时才计算速率,开销接近于零;
# 3. 即便进程重启缓存丢失,也只会让第一次速率显示为 0不影响整体可用性。
NETWORK_IO_SAMPLE = {
"timestamp": 0.0,
"bytes_sent": 0,
"bytes_recv": 0,
}
def _system_config_path() -> str: def _system_config_path() -> str:
@@ -68,6 +79,178 @@ def _format_bytes_to_mb(value: int) -> float:
return round((_safe_float(value, 0.0) / 1024 / 1024), 2) return round((_safe_float(value, 0.0) / 1024 / 1024), 2)
def _safe_divide(numerator: float, denominator: float, default: float = 0.0) -> float:
"""安全除法,避免速率与占比计算时被 0 除打断。"""
try:
if not denominator:
return default
return numerator / denominator
except Exception:
return default
def _primary_disk_path() -> str:
"""返回当前系统最稳妥的主盘路径。"""
# 资源监控页既要兼容你本地 Windows 开发环境,也要兼容线上 Linux
# 1. 优先用系统根目录Linux 下是 /
# 2. Windows 下会自动变成当前盘符根路径;
# 3. 避免把磁盘路径硬编码成 /,导致本地调试时报错。
return os.path.abspath(os.sep)
def _format_datetime_text(timestamp_value: float | int | None) -> str:
"""把时间戳格式化为后台页面可直接展示的文本。"""
if not timestamp_value:
return "-"
try:
return datetime.fromtimestamp(float(timestamp_value)).strftime("%Y-%m-%d %H:%M:%S")
except Exception:
return "-"
def _sample_network_speed() -> dict:
"""根据两次页面采样估算网络上下行速率。"""
counters = psutil.net_io_counters()
now = time.time()
current_sent = _safe_int(getattr(counters, "bytes_sent", 0))
current_recv = _safe_int(getattr(counters, "bytes_recv", 0))
last_timestamp = _safe_float(NETWORK_IO_SAMPLE.get("timestamp"))
elapsed = max(now - last_timestamp, 0.0)
upload_speed = 0.0
download_speed = 0.0
if elapsed > 0 and last_timestamp > 0:
upload_speed = _safe_divide(current_sent - _safe_int(NETWORK_IO_SAMPLE.get("bytes_sent")), elapsed, 0.0)
download_speed = _safe_divide(current_recv - _safe_int(NETWORK_IO_SAMPLE.get("bytes_recv")), elapsed, 0.0)
upload_speed = max(upload_speed, 0.0)
download_speed = max(download_speed, 0.0)
NETWORK_IO_SAMPLE["timestamp"] = now
NETWORK_IO_SAMPLE["bytes_sent"] = current_sent
NETWORK_IO_SAMPLE["bytes_recv"] = current_recv
return {
"bytes_sent": current_sent,
"bytes_recv": current_recv,
"upload_speed_bps": round(upload_speed, 2),
"download_speed_bps": round(download_speed, 2),
}
def _extract_server_runtime_snapshot() -> dict:
"""构建资源监控页使用的轻量服务器运行态快照。"""
# 这套快照有意只覆盖“日常观察最有价值”的内容:
# 1. 主机资源CPU / 内存 / 磁盘 / 网络;
# 2. 应用进程:当前 ABOT 进程是否活着、吃了多少资源;
# 3. 基础设施MySQL / Redis 继续复用现有摘要探测;
# 4. 不再依赖 glances 进程,部署和运维负担会轻很多。
server = current_app.dashboard_server
current_process = psutil.Process(os.getpid())
virtual_memory = psutil.virtual_memory()
swap_memory = psutil.swap_memory()
cpu_usage = psutil.cpu_percent(interval=None)
process_cpu_usage = current_process.cpu_percent(interval=None)
boot_time = psutil.boot_time()
network_sample = _sample_network_speed()
disk_io = psutil.disk_io_counters()
try:
load_values = os.getloadavg()
except (AttributeError, OSError):
load_values = (0.0, 0.0, 0.0)
disk_items = []
seen_mountpoints = set()
for partition in psutil.disk_partitions(all=False):
mountpoint = str(getattr(partition, "mountpoint", "") or "").strip()
if not mountpoint or mountpoint in seen_mountpoints:
continue
seen_mountpoints.add(mountpoint)
try:
usage = psutil.disk_usage(mountpoint)
except Exception:
continue
disk_items.append({
"device": str(getattr(partition, "device", "") or "").strip() or mountpoint,
"mountpoint": mountpoint,
"fstype": str(getattr(partition, "fstype", "") or "").strip(),
"total_bytes": _safe_int(getattr(usage, "total", 0)),
"used_bytes": _safe_int(getattr(usage, "used", 0)),
"free_bytes": _safe_int(getattr(usage, "free", 0)),
"usage_percent": round(_safe_float(getattr(usage, "percent", 0.0)), 1),
})
disk_items.sort(key=lambda item: item.get("usage_percent", 0.0), reverse=True)
primary_disk_usage = psutil.disk_usage(_primary_disk_path())
process_memory = current_process.memory_info()
try:
open_files = len(current_process.open_files())
except Exception:
open_files = 0
try:
tcp_connections = current_process.connections(kind="inet")
established_connections = sum(
1 for conn in tcp_connections if str(getattr(conn, "status", "") or "").upper() == "ESTABLISHED"
)
except Exception:
established_connections = 0
return {
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"server": {
"hostname": socket.gethostname(),
"os": platform.system(),
"os_version": platform.version(),
"python_version": platform.python_version(),
"boot_time": _format_datetime_text(boot_time),
"uptime_seconds": round(max(time.time() - boot_time, 0), 2),
},
"cpu": {
"usage_percent": round(cpu_usage, 1),
"logical_count": psutil.cpu_count(logical=True) or 0,
"physical_count": psutil.cpu_count(logical=False) or 0,
"load_1": round(_safe_float(load_values[0]), 2),
"load_5": round(_safe_float(load_values[1]), 2),
"load_15": round(_safe_float(load_values[2]), 2),
},
"memory": {
"usage_percent": round(_safe_float(getattr(virtual_memory, "percent", 0.0)), 1),
"total_bytes": _safe_int(getattr(virtual_memory, "total", 0)),
"used_bytes": _safe_int(getattr(virtual_memory, "used", 0)),
"available_bytes": _safe_int(getattr(virtual_memory, "available", 0)),
"swap_usage_percent": round(_safe_float(getattr(swap_memory, "percent", 0.0)), 1),
"swap_total_bytes": _safe_int(getattr(swap_memory, "total", 0)),
"swap_used_bytes": _safe_int(getattr(swap_memory, "used", 0)),
},
"disk": {
"primary_usage_percent": round(_safe_float(getattr(primary_disk_usage, "percent", 0.0)), 1),
"primary_total_bytes": _safe_int(getattr(primary_disk_usage, "total", 0)),
"primary_used_bytes": _safe_int(getattr(primary_disk_usage, "used", 0)),
"io_read_bytes": _safe_int(getattr(disk_io, "read_bytes", 0)) if disk_io else 0,
"io_write_bytes": _safe_int(getattr(disk_io, "write_bytes", 0)) if disk_io else 0,
"items": disk_items[:8],
},
"network": {
**network_sample,
"established_connections": established_connections,
},
"process": {
"pid": current_process.pid,
"cpu_percent": round(process_cpu_usage, 1),
"memory_percent": round(current_process.memory_percent(), 2),
"memory_rss_bytes": _safe_int(getattr(process_memory, "rss", 0)),
"thread_count": current_process.num_threads(),
"open_files": open_files,
"create_time": _format_datetime_text(current_process.create_time()),
"uptime_seconds": round(max(time.time() - current_process.create_time(), 0), 2),
},
"infrastructure": {
"mysql": _extract_mysql_runtime_snapshot(server.db_manager),
"redis": _extract_redis_runtime_snapshot(server.db_manager),
},
}
def _extract_mysql_runtime_snapshot(db_manager) -> dict: def _extract_mysql_runtime_snapshot(db_manager) -> dict:
"""采集 MySQL 运行态摘要。 """采集 MySQL 运行态摘要。
@@ -761,17 +944,11 @@ def api_docs():
@system_bp.route('/system_status') @system_bp.route('/system_status')
@login_required @login_required
def system_status(): def system_status():
src = request.args.get('src') # 资源监控页改为项目内置轻量面板:
if not src: # 1. 不再依赖 glances 独立进程;
try: # 2. 页面只消费当前服务自身的 API
server = current_app.dashboard_server # 3. 线上部署时不用额外开放 61208 之类的端口。
glances = getattr(server.robot, "config").glances if hasattr(server.robot, "config") else {} return render_template('system_status.html')
host = glances.get("host", "127.0.0.1")
port = glances.get("port", 61208)
src = f"http://{host}:{port}/"
except Exception:
src = "http://127.0.0.1:61208/"
return render_template('system_status.html', src_url=src)
@system_bp.route('/system_llm') @system_bp.route('/system_llm')
@@ -811,6 +988,20 @@ def api_system_info():
return jsonify({"success": False, "error": str(e)}), 500 return jsonify({"success": False, "error": str(e)}), 500
@system_bp.route('/api/system_status_overview')
@login_required
def api_system_status_overview():
"""返回资源监控页使用的轻量服务器快照。"""
try:
return jsonify({
"success": True,
"data": _extract_server_runtime_snapshot(),
})
except Exception as e:
logger.error(f"获取资源监控快照失败: {e}")
return jsonify({"success": False, "error": str(e)}), 500
@system_bp.route('/api/system_health_summary') @system_bp.route('/api/system_health_summary')
@login_required @login_required
def api_system_health_summary(): def api_system_health_summary():

View File

@@ -6,9 +6,23 @@
<div class="page-shell system-page"> <div class="page-shell system-page">
<div class="page-hero"> <div class="page-hero">
<div class="page-hero-copy"> <div class="page-hero-copy">
<div class="page-eyebrow">System Workspace</div> <div class="page-eyebrow">Resource Overview</div>
<h1>资源监控</h1> <h1>资源监控</h1>
<p>直接在后台查看系统资源变化与运行状态,保持监控入口简洁清晰</p> <p>直接观察 ABOT 所在服务器的关键资源、应用进程和基础设施状态,不再依赖额外的 glances 进程</p>
<div class="hero-meta-row">
<div class="hero-meta-pill">
<span class="hero-meta-label">最近刷新</span>
<span class="hero-meta-value">{% raw %}{{ statusOverview.timestamp || '-' }}{% endraw %}</span>
</div>
<div class="hero-meta-pill" v-if="statusOverview.server">
<span class="hero-meta-label">主机</span>
<span class="hero-meta-value">{% raw %}{{ statusOverview.server.hostname || '-' }}{% endraw %}</span>
</div>
<div class="hero-meta-pill" v-if="statusOverview.process">
<span class="hero-meta-label">PID</span>
<span class="hero-meta-value">{% raw %}{{ statusOverview.process.pid || '-' }}{% endraw %}</span>
</div>
</div>
<div class="md2img-health-inline" v-loading="md2imgLoading"> <div class="md2img-health-inline" v-loading="md2imgLoading">
<span class="health-title">转图运行时</span> <span class="health-title">转图运行时</span>
<el-tag size="mini" :type="runtimeTagType">{% raw %}{{ runtimeTagText }}{% endraw %}</el-tag> <el-tag size="mini" :type="runtimeTagType">{% raw %}{{ runtimeTagText }}{% endraw %}</el-tag>
@@ -23,27 +37,221 @@
<el-button type="success" plain :loading="md2imgWarming" @click="warmupMd2Img"> <el-button type="success" plain :loading="md2imgWarming" @click="warmupMd2Img">
<i class="el-icon-magic-stick"></i> 预热转图 <i class="el-icon-magic-stick"></i> 预热转图
</el-button> </el-button>
<el-button type="info" plain :loading="md2imgLoading" @click="loadMd2ImgHealth"> <el-button type="info" plain :loading="loading" @click="loadStatusOverview(true)">
<i class="el-icon-refresh"></i> 刷新转图状态 <i class="el-icon-refresh"></i> 刷新监控
</el-button>
<el-button type="danger" @click="confirmRestart">
<i class="el-icon-refresh-left"></i> 重启服务
</el-button> </el-button>
<el-button type="primary" plain @click="reloadIframe"><i class="el-icon-refresh"></i> 刷新面板</el-button>
<el-button type="primary" @click="openInNewTab"><i class="el-icon-top-right"></i> 新窗口打开</el-button>
<el-button type="danger" @click="confirmRestart"><i class="el-icon-refresh-left"></i> 重启服务</el-button>
</div> </div>
</div> </div>
<el-card class="iframe-shell-card" shadow="hover"> <div class="stats-grid">
<div slot="header" class="workspace-header"> <el-card class="stat-card" shadow="hover">
<div> <div class="stat-card__label">CPU 使用率</div>
<h3>监控面板</h3> <div class="stat-card__value">{% raw %}{{ formatPercent(cpu.usage_percent) }}{% endraw %}</div>
<p>直接在控制台内查看系统资源变化与运行状态。</p> <div class="stat-card__meta">{% raw %}{{ cpu.logical_count || 0 }}{% endraw %} 线程 · load1 {% raw %}{{ formatNumber(cpu.load_1, 2) }}{% endraw %}</div>
<el-progress :percentage="normalizePercent(cpu.usage_percent)" :stroke-width="10" :status="progressStatus(cpu.usage_percent, 65, 85)"></el-progress>
</el-card>
<el-card class="stat-card" shadow="hover">
<div class="stat-card__label">内存使用率</div>
<div class="stat-card__value">{% raw %}{{ formatPercent(memory.usage_percent) }}{% endraw %}</div>
<div class="stat-card__meta">{% raw %}{{ formatBytes(memory.used_bytes) }}{% endraw %} / {% raw %}{{ formatBytes(memory.total_bytes) }}{% endraw %}</div>
<el-progress :percentage="normalizePercent(memory.usage_percent)" :stroke-width="10" :status="progressStatus(memory.usage_percent, 70, 88)"></el-progress>
</el-card>
<el-card class="stat-card" shadow="hover">
<div class="stat-card__label">主盘使用率</div>
<div class="stat-card__value">{% raw %}{{ formatPercent(disk.primary_usage_percent) }}{% endraw %}</div>
<div class="stat-card__meta">{% raw %}{{ formatBytes(disk.primary_used_bytes) }}{% endraw %} / {% raw %}{{ formatBytes(disk.primary_total_bytes) }}{% endraw %}</div>
<el-progress :percentage="normalizePercent(disk.primary_usage_percent)" :stroke-width="10" :status="progressStatus(disk.primary_usage_percent, 75, 90)"></el-progress>
</el-card>
<el-card class="stat-card" shadow="hover">
<div class="stat-card__label">网络速率</div>
<div class="stat-card__value">{% raw %}{{ formatSpeed(network.download_speed_bps) }}{% endraw %}</div>
<div class="stat-card__meta">上行 {% raw %}{{ formatSpeed(network.upload_speed_bps) }}{% endraw %} · 连接 {% raw %}{{ network.established_connections || 0 }}{% endraw %}</div>
<div class="network-balance">
<span>累计上行 {% raw %}{{ formatBytes(network.bytes_sent) }}{% endraw %}</span>
<span>累计下行 {% raw %}{{ formatBytes(network.bytes_recv) }}{% endraw %}</span>
</div> </div>
<div class="iframe-url">{{ src_url }}</div> </el-card>
</div> </div>
<div class="iframe-shell">
<iframe ref="monitorFrame" src="{{ src_url }}" frameborder="0"></iframe> <div class="monitor-grid">
</div> <el-card class="workspace-card" shadow="hover">
</el-card> <div slot="header" class="workspace-header">
<div>
<h3>系统概览</h3>
<p>观察主机、Python 运行环境和负载情况,快速判断是不是机器层面的问题。</p>
</div>
</div>
<div class="detail-grid">
<div class="detail-item">
<span class="detail-item__label">主机名</span>
<span class="detail-item__value">{% raw %}{{ server.hostname || '-' }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">操作系统</span>
<span class="detail-item__value">{% raw %}{{ server.os || '-' }}{% endraw %}</span>
</div>
<div class="detail-item detail-item--wide">
<span class="detail-item__label">系统版本</span>
<span class="detail-item__value">{% raw %}{{ server.os_version || '-' }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">Python</span>
<span class="detail-item__value">{% raw %}{{ server.python_version || '-' }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">开机时间</span>
<span class="detail-item__value">{% raw %}{{ server.boot_time || '-' }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">系统运行时长</span>
<span class="detail-item__value">{% raw %}{{ formatDuration(server.uptime_seconds) }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">Load 1 / 5 / 15</span>
<span class="detail-item__value">{% raw %}{{ formatNumber(cpu.load_1, 2) }} / {{ formatNumber(cpu.load_5, 2) }} / {{ formatNumber(cpu.load_15, 2) }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">CPU 核心</span>
<span class="detail-item__value">{% raw %}{{ cpu.physical_count || 0 }}{% endraw %} 物理 / {% raw %}{{ cpu.logical_count || 0 }}{% endraw %} 逻辑</span>
</div>
<div class="detail-item">
<span class="detail-item__label">Swap 使用率</span>
<span class="detail-item__value">{% raw %}{{ formatPercent(memory.swap_usage_percent) }}{% endraw %}</span>
</div>
</div>
</el-card>
<el-card class="workspace-card" shadow="hover">
<div slot="header" class="workspace-header">
<div>
<h3>ABOT 进程</h3>
<p>确认当前应用自身的资源占用,避免只看主机而忽略进程级热点。</p>
</div>
</div>
<div class="detail-grid">
<div class="detail-item">
<span class="detail-item__label">PID</span>
<span class="detail-item__value">{% raw %}{{ process.pid || '-' }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">进程 CPU</span>
<span class="detail-item__value">{% raw %}{{ formatPercent(process.cpu_percent) }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">进程内存</span>
<span class="detail-item__value">{% raw %}{{ formatBytes(process.memory_rss_bytes) }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">内存占比</span>
<span class="detail-item__value">{% raw %}{{ formatPercent(process.memory_percent) }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">线程数</span>
<span class="detail-item__value">{% raw %}{{ process.thread_count || 0 }}{% endraw %}</span>
</div>
<div class="detail-item">
<span class="detail-item__label">打开文件</span>
<span class="detail-item__value">{% raw %}{{ process.open_files || 0 }}{% endraw %}</span>
</div>
<div class="detail-item detail-item--wide">
<span class="detail-item__label">进程启动时间</span>
<span class="detail-item__value">{% raw %}{{ process.create_time || '-' }}{% endraw %}</span>
</div>
<div class="detail-item detail-item--wide">
<span class="detail-item__label">进程运行时长</span>
<span class="detail-item__value">{% raw %}{{ formatDuration(process.uptime_seconds) }}{% endraw %}</span>
</div>
</div>
</el-card>
<el-card class="workspace-card workspace-card--wide" shadow="hover">
<div slot="header" class="workspace-header">
<div>
<h3>基础设施运行态</h3>
<p>把数据库和缓存的关键摘要放在同一屏里,日常看状态不需要再跳出去。</p>
</div>
</div>
<div class="infra-grid">
<div class="infra-panel" :class="`infra-panel--${infrastructure.mysql.status || 'warning'}`">
<div class="infra-panel__head">
<div>
<div class="infra-panel__title">MySQL</div>
<div class="infra-panel__summary">{% raw %}{{ infrastructure.mysql.summary || '暂无状态' }}{% endraw %}</div>
</div>
<el-tag size="mini" :type="tagType(infrastructure.mysql.status)">{% raw %}{{ statusText(infrastructure.mysql.status) }}{% endraw %}</el-tag>
</div>
<div class="infra-metrics">
<div class="infra-metric"><span>数据库</span><strong>{% raw %}{{ infrastructure.mysql.database || '-' }}{% endraw %}</strong></div>
<div class="infra-metric"><span>版本</span><strong>{% raw %}{{ infrastructure.mysql.version || '-' }}{% endraw %}</strong></div>
<div class="infra-metric"><span>连接数</span><strong>{% raw %}{{ infrastructure.mysql.threads_connected || 0 }}{% endraw %} / {% raw %}{{ infrastructure.mysql.max_connections || '-' }}{% endraw %}</strong></div>
<div class="infra-metric"><span>连接负载</span><strong>{% raw %}{{ formatPercent(infrastructure.mysql.connection_usage_percent) }}{% endraw %}</strong></div>
<div class="infra-metric"><span>运行线程</span><strong>{% raw %}{{ infrastructure.mysql.threads_running || 0 }}{% endraw %}</strong></div>
<div class="infra-metric"><span>QPS</span><strong>{% raw %}{{ formatNumber(infrastructure.mysql.questions_per_second, 2) }}{% endraw %}</strong></div>
<div class="infra-metric"><span>表数量</span><strong>{% raw %}{{ infrastructure.mysql.table_count || 0 }}{% endraw %}</strong></div>
<div class="infra-metric"><span>库体量</span><strong>{% raw %}{{ formatMb(infrastructure.mysql.schema_size_mb) }}{% endraw %}</strong></div>
</div>
</div>
<div class="infra-panel" :class="`infra-panel--${infrastructure.redis.status || 'warning'}`">
<div class="infra-panel__head">
<div>
<div class="infra-panel__title">Redis</div>
<div class="infra-panel__summary">{% raw %}{{ infrastructure.redis.summary || '暂无状态' }}{% endraw %}</div>
</div>
<el-tag size="mini" :type="tagType(infrastructure.redis.status)">{% raw %}{{ statusText(infrastructure.redis.status) }}{% endraw %}</el-tag>
</div>
<div class="infra-metrics">
<div class="infra-metric"><span>DB</span><strong>{% raw %}{{ infrastructure.redis.db_index || 0 }}{% endraw %}</strong></div>
<div class="infra-metric"><span>Key 数量</span><strong>{% raw %}{{ infrastructure.redis.key_count || 0 }}{% endraw %}</strong></div>
<div class="infra-metric"><span>客户端</span><strong>{% raw %}{{ infrastructure.redis.connected_clients || 0 }}{% endraw %}</strong></div>
<div class="infra-metric"><span>阻塞客户端</span><strong>{% raw %}{{ infrastructure.redis.blocked_clients || 0 }}{% endraw %}</strong></div>
<div class="infra-metric"><span>OPS/s</span><strong>{% raw %}{{ infrastructure.redis.ops_per_sec || 0 }}{% endraw %}</strong></div>
<div class="infra-metric"><span>命中率</span><strong>{% raw %}{{ formatPercent(infrastructure.redis.hit_rate_percent) }}{% endraw %}</strong></div>
<div class="infra-metric"><span>已用内存</span><strong>{% raw %}{{ infrastructure.redis.used_memory_human || '-' }}{% endraw %}</strong></div>
<div class="infra-metric"><span>峰值内存</span><strong>{% raw %}{{ infrastructure.redis.used_memory_peak_human || '-' }}{% endraw %}</strong></div>
</div>
</div>
</div>
</el-card>
<el-card class="workspace-card workspace-card--wide" shadow="hover">
<div slot="header" class="workspace-header">
<div>
<h3>磁盘挂载点</h3>
<p>按使用率排序展示常用挂载点,方便快速发现哪个分区快满了。</p>
</div>
</div>
<el-table :data="disk.items || []" size="mini" style="width: 100%">
<el-table-column prop="mountpoint" label="挂载点" min-width="180"></el-table-column>
<el-table-column prop="device" label="设备" min-width="180"></el-table-column>
<el-table-column prop="fstype" label="文件系统" width="110"></el-table-column>
<el-table-column label="已用 / 总量" min-width="170">
<template slot-scope="scope">
{% raw %}{{ formatBytes(scope.row.used_bytes) }} / {{ formatBytes(scope.row.total_bytes) }}{% endraw %}
</template>
</el-table-column>
<el-table-column label="使用率" min-width="180">
<template slot-scope="scope">
<div class="table-progress-cell">
<span>{% raw %}{{ formatPercent(scope.row.usage_percent) }}{% endraw %}</span>
<el-progress
:percentage="normalizePercent(scope.row.usage_percent)"
:stroke-width="8"
:status="progressStatus(scope.row.usage_percent, 75, 90)">
</el-progress>
</div>
</template>
</el-table-column>
<el-table-column label="剩余空间" min-width="120">
<template slot-scope="scope">
{% raw %}{{ formatBytes(scope.row.free_bytes) }}{% endraw %}
</template>
</el-table-column>
</el-table>
</el-card>
</div>
</div> </div>
{% endblock %} {% endblock %}
@@ -55,14 +263,49 @@
data() { data() {
return { return {
currentView: '14', currentView: '14',
frameUrl: '{{ src_url }}',
restarting: false, restarting: false,
loading: false,
md2imgLoading: false, md2imgLoading: false,
md2imgWarming: false, md2imgWarming: false,
md2imgHealth: null autoRefreshTimer: null,
md2imgHealth: null,
statusOverview: {
timestamp: '',
server: {},
cpu: {},
memory: {},
disk: { items: [] },
network: {},
process: {},
infrastructure: {
mysql: {},
redis: {}
}
}
} }
}, },
computed: { computed: {
server() {
return this.statusOverview.server || {};
},
cpu() {
return this.statusOverview.cpu || {};
},
memory() {
return this.statusOverview.memory || {};
},
disk() {
return this.statusOverview.disk || { items: [] };
},
network() {
return this.statusOverview.network || {};
},
process() {
return this.statusOverview.process || {};
},
infrastructure() {
return this.statusOverview.infrastructure || { mysql: {}, redis: {} };
},
runtimeTagText() { runtimeTagText() {
const runtime = this.md2imgHealth && this.md2imgHealth.runtime ? this.md2imgHealth.runtime : {}; const runtime = this.md2imgHealth && this.md2imgHealth.runtime ? this.md2imgHealth.runtime : {};
return runtime.loop_running ? '运行时在线' : '运行时未就绪'; return runtime.loop_running ? '运行时在线' : '运行时未就绪';
@@ -73,7 +316,7 @@
}, },
browserTagText() { browserTagText() {
const browser = this.md2imgHealth && this.md2imgHealth.browser ? this.md2imgHealth.browser : {}; const browser = this.md2imgHealth && this.md2imgHealth.browser ? this.md2imgHealth.browser : {};
return browser.connected ? '浏览器已连接' : '浏览器未连接'; return browser.connected ? '浏览器已连接' : 'info';
}, },
browserTagType() { browserTagType() {
const browser = this.md2imgHealth && this.md2imgHealth.browser ? this.md2imgHealth.browser : {}; const browser = this.md2imgHealth && this.md2imgHealth.browser ? this.md2imgHealth.browser : {};
@@ -91,16 +334,106 @@
}, },
mounted() { mounted() {
this.currentView = '14'; this.currentView = '14';
this.loadStatusOverview(false);
this.loadMd2ImgHealth(); this.loadMd2ImgHealth();
// 资源页采用前端轮询即可满足日常观察,不需要为了监控再起单独守护进程。
this.autoRefreshTimer = setInterval(() => {
this.loadStatusOverview(false);
}, 10000);
},
beforeDestroy() {
if (this.autoRefreshTimer) {
clearInterval(this.autoRefreshTimer);
this.autoRefreshTimer = null;
}
}, },
methods: { methods: {
reloadIframe() { async loadStatusOverview(showToast) {
if (this.$refs.monitorFrame) { if (this.loading) return;
this.$refs.monitorFrame.src = this.frameUrl; this.loading = true;
try {
const response = await axios.get('/api/system_status_overview');
if (response.data && response.data.success) {
this.statusOverview = response.data.data || this.statusOverview;
if (showToast) {
this.$message.success('资源监控已刷新');
}
} else {
this.$message.error(response.data?.message || '获取资源监控失败');
}
} catch (error) {
this.$message.error(error.response?.data?.message || '获取资源监控失败');
} finally {
this.loading = false;
} }
}, },
openInNewTab() { normalizePercent(value) {
window.open(this.frameUrl, '_blank'); const number = Number(value || 0);
if (Number.isNaN(number)) return 0;
return Math.max(0, Math.min(100, Number(number.toFixed(1))));
},
progressStatus(value, warningThreshold, dangerThreshold) {
const percent = this.normalizePercent(value);
if (percent >= dangerThreshold) return 'exception';
if (percent >= warningThreshold) return 'warning';
return 'success';
},
formatNumber(value, digits = 0) {
if (value === null || value === undefined || value === '') return '-';
const number = Number(value);
if (Number.isNaN(number)) return String(value);
return number.toFixed(digits);
},
formatPercent(value) {
if (value === null || value === undefined || value === '') return '-';
return `${this.formatNumber(value, 1)}%`;
},
formatBytes(value) {
const bytes = Number(value || 0);
if (!Number.isFinite(bytes) || bytes <= 0) return '0 B';
const units = ['B', 'KB', 'MB', 'GB', 'TB'];
let size = bytes;
let index = 0;
while (size >= 1024 && index < units.length - 1) {
size /= 1024;
index += 1;
}
const digits = index === 0 ? 0 : (size >= 100 ? 0 : (size >= 10 ? 1 : 2));
return `${size.toFixed(digits)} ${units[index]}`;
},
formatSpeed(value) {
return `${this.formatBytes(value)}/s`;
},
formatDuration(seconds) {
const total = parseInt(seconds || 0, 10);
if (!total || total <= 0) return '-';
const days = Math.floor(total / 86400);
const hours = Math.floor((total % 86400) / 3600);
const minutes = Math.floor((total % 3600) / 60);
if (days > 0) return `${days}${hours}小时`;
if (hours > 0) return `${hours}小时 ${minutes}分钟`;
return `${minutes}分钟`;
},
formatMb(value) {
const number = Number(value || 0);
if (!Number.isFinite(number) || number <= 0) return '0 MB';
return `${number.toFixed(number >= 100 ? 0 : 1)} MB`;
},
statusText(status) {
const mapping = {
healthy: '健康',
warning: '关注',
danger: '异常'
};
return mapping[status] || '未知';
},
tagType(status) {
const mapping = {
healthy: 'success',
warning: 'warning',
danger: 'danger'
};
return mapping[status] || 'info';
}, },
confirmRestart() { confirmRestart() {
this.$confirm('确认执行 ./restart.sh 重启服务吗?这会中断当前服务几秒钟。', '重启确认', { this.$confirm('确认执行 ./restart.sh 重启服务吗?这会中断当前服务几秒钟。', '重启确认', {
@@ -131,7 +464,6 @@
if (this.md2imgLoading) return; if (this.md2imgLoading) return;
this.md2imgLoading = true; this.md2imgLoading = true;
try { try {
// 默认不强制拉起 runtime避免纯查看状态时引入副作用。
const response = await axios.get('/api/system/md2img_health'); const response = await axios.get('/api/system/md2img_health');
if (response.data && response.data.success) { if (response.data && response.data.success) {
this.md2imgHealth = response.data.data || null; this.md2imgHealth = response.data.data || null;
@@ -170,44 +502,79 @@
.page-shell { display: flex; flex-direction: column; gap: 16px; } .page-shell { display: flex; flex-direction: column; gap: 16px; }
.page-hero { .page-hero {
display: flex; align-items: flex-end; justify-content: space-between; gap: 18px; padding: 24px 26px; border-radius: 24px; display: flex; align-items: flex-end; justify-content: space-between; gap: 18px; padding: 24px 26px; border-radius: 24px;
background: linear-gradient(135deg, rgba(79,70,229,0.10), rgba(59,130,246,0.08), rgba(255,255,255,0.9)); background: linear-gradient(135deg, rgba(15,23,42,0.05), rgba(59,130,246,0.10), rgba(255,255,255,0.95));
border: 1px solid rgba(148, 163, 184, 0.16); box-shadow: 0 18px 40px rgba(15, 23, 42, 0.06); border: 1px solid rgba(148, 163, 184, 0.16); box-shadow: 0 18px 40px rgba(15, 23, 42, 0.06);
} }
.page-hero-actions { display: flex; align-items: center; gap: 12px; } .page-hero-actions { display: flex; align-items: center; gap: 12px; flex-wrap: wrap; }
.page-eyebrow { font-size: 12px; text-transform: uppercase; letter-spacing: .08em; color: #6366f1; font-weight: 700; margin-bottom: 8px; } .page-eyebrow { font-size: 12px; text-transform: uppercase; letter-spacing: .08em; color: #2563eb; font-weight: 700; margin-bottom: 8px; }
.page-hero-copy h1 { font-size: 30px; line-height: 1.1; margin-bottom: 10px; color: #0f172a; } .page-hero-copy h1 { font-size: 30px; line-height: 1.1; margin-bottom: 10px; color: #0f172a; }
.page-hero-copy p { color: #64748b; font-size: 14px; } .page-hero-copy p { color: #64748b; font-size: 14px; }
.hero-meta-row { display: flex; gap: 10px; flex-wrap: wrap; margin-top: 14px; }
.hero-meta-pill {
display: inline-flex; align-items: center; gap: 8px; padding: 8px 12px; border-radius: 999px;
background: rgba(255,255,255,0.8); border: 1px solid rgba(148,163,184,0.14); color: #475569; font-size: 12px;
}
.hero-meta-label { color: #94a3b8; }
.hero-meta-value { color: #0f172a; font-weight: 700; }
.md2img-health-inline { .md2img-health-inline {
margin-top: 12px; margin-top: 12px; display: flex; align-items: center; gap: 8px; flex-wrap: wrap; padding: 8px 10px;
display: flex; border-radius: 12px; background: rgba(255,255,255,0.72); border: 1px solid rgba(148, 163, 184, 0.18);
align-items: center;
gap: 8px;
flex-wrap: wrap;
padding: 8px 10px;
border-radius: 12px;
background: rgba(255,255,255,0.72);
border: 1px solid rgba(148, 163, 184, 0.18);
} }
.health-title { font-size: 12px; font-weight: 700; color: #334155; } .health-title { font-size: 12px; font-weight: 700; color: #334155; }
.health-brief { font-size: 12px; color: #475569; } .health-brief { font-size: 12px; color: #475569; }
.health-time { font-size: 12px; color: #94a3b8; } .health-time { font-size: 12px; color: #94a3b8; }
.stats-grid { display: grid; grid-template-columns: repeat(4, minmax(0, 1fr)); gap: 16px; }
.stat-card { border-radius: 20px; }
.stat-card__label { color: #64748b; font-size: 13px; margin-bottom: 10px; }
.stat-card__value { color: #0f172a; font-size: 30px; font-weight: 700; line-height: 1; margin-bottom: 10px; }
.stat-card__meta { color: #475569; font-size: 12px; margin-bottom: 12px; min-height: 18px; }
.network-balance { margin-top: 12px; display: flex; justify-content: space-between; gap: 10px; color: #64748b; font-size: 12px; }
.monitor-grid { display: grid; grid-template-columns: repeat(2, minmax(0, 1fr)); gap: 16px; }
.workspace-card { border-radius: 20px; }
.workspace-card--wide { grid-column: 1 / -1; }
.workspace-header { display: flex; align-items: center; justify-content: space-between; gap: 16px; } .workspace-header { display: flex; align-items: center; justify-content: space-between; gap: 16px; }
.workspace-header h3 { font-size: 18px; margin-bottom: 4px; } .workspace-header h3 { font-size: 18px; margin-bottom: 4px; }
.workspace-header p { font-size: 13px; color: #64748b; } .workspace-header p { font-size: 13px; color: #64748b; }
.iframe-url { .detail-grid { display: grid; grid-template-columns: repeat(2, minmax(0, 1fr)); gap: 12px; }
max-width: 40%; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; font-size: 12px; .detail-item {
color: #94a3b8; padding: 8px 12px; border-radius: 999px; background: rgba(248,250,252,0.9); border: 1px solid rgba(148,163,184,0.12); display: flex; flex-direction: column; gap: 6px; padding: 14px 16px; border-radius: 16px;
background: rgba(248,250,252,0.82); border: 1px solid rgba(148,163,184,0.12);
}
.detail-item--wide { grid-column: 1 / -1; }
.detail-item__label { color: #64748b; font-size: 12px; }
.detail-item__value { color: #0f172a; font-size: 15px; font-weight: 700; word-break: break-all; }
.infra-grid { display: grid; grid-template-columns: repeat(2, minmax(0, 1fr)); gap: 14px; }
.infra-panel {
padding: 16px; border-radius: 18px; border: 1px solid rgba(148,163,184,0.12); background: rgba(248,250,252,0.86);
}
.infra-panel--healthy { box-shadow: inset 0 0 0 1px rgba(16,185,129,0.08); }
.infra-panel--warning { box-shadow: inset 0 0 0 1px rgba(245,158,11,0.10); }
.infra-panel--danger { box-shadow: inset 0 0 0 1px rgba(239,68,68,0.10); }
.infra-panel__head { display: flex; align-items: flex-start; justify-content: space-between; gap: 12px; margin-bottom: 14px; }
.infra-panel__title { color: #0f172a; font-size: 16px; font-weight: 700; }
.infra-panel__summary { color: #64748b; font-size: 12px; line-height: 1.6; margin-top: 4px; }
.infra-metrics { display: grid; grid-template-columns: repeat(2, minmax(0, 1fr)); gap: 10px 12px; }
.infra-metric {
display: flex; flex-direction: column; gap: 4px; padding: 10px 12px; border-radius: 14px;
background: rgba(255,255,255,0.75); border: 1px solid rgba(148,163,184,0.10);
}
.infra-metric span { color: #64748b; font-size: 12px; }
.infra-metric strong { color: #0f172a; font-size: 14px; font-weight: 700; word-break: break-word; }
.table-progress-cell { display: grid; grid-template-columns: 60px 1fr; align-items: center; gap: 10px; }
@media (max-width: 1200px) {
.stats-grid { grid-template-columns: repeat(2, minmax(0, 1fr)); }
.monitor-grid { grid-template-columns: 1fr; }
.infra-grid { grid-template-columns: 1fr; }
} }
.iframe-shell-card { height: calc(100vh - 230px); }
.iframe-shell-card .el-card__body { height: calc(100% - 73px); }
.iframe-shell { height: 100%; border-radius: 18px; overflow: hidden; border: 1px solid rgba(148,163,184,0.12); background: rgba(248,250,252,0.82); }
.iframe-shell iframe { width: 100%; height: 100%; border: none; display: block; background: #fff; }
@media (max-width: 960px) { @media (max-width: 960px) {
.page-hero { flex-direction: column; align-items: flex-start; } .page-hero { flex-direction: column; align-items: flex-start; }
.workspace-header { flex-direction: column; align-items: flex-start; } .detail-grid { grid-template-columns: 1fr; }
.page-hero-actions { flex-wrap: wrap; } .detail-item--wide { grid-column: auto; }
.iframe-url { max-width: 100%; } }
.md2img-health-inline { width: 100%; } @media (max-width: 640px) {
.stats-grid { grid-template-columns: 1fr; }
.network-balance { flex-direction: column; }
.infra-metrics { grid-template-columns: 1fr; }
} }
</style> </style>
{% endblock %} {% endblock %}

18
main.py
View File

@@ -3,7 +3,6 @@
import asyncio import asyncio
import threading import threading
from admin.GlancesMonitor import GlancesMonitor
from utils.decorator.async_job import async_job from utils.decorator.async_job import async_job
from utils.markdown_to_image import warmup_md2img_browser from utils.markdown_to_image import warmup_md2img_browser
from configuration import Config from configuration import Config
@@ -126,23 +125,6 @@ def main():
robot.LOG.info(f"Dashboard服务器已在 http://{dashboard_server.host}:{dashboard_server.port} 启动") robot.LOG.info(f"Dashboard服务器已在 http://{dashboard_server.host}:{dashboard_server.port} 启动")
except Exception as e: except Exception as e:
robot.LOG.error(f"Dashboard服务器启动失败: {e}") robot.LOG.error(f"Dashboard服务器启动失败: {e}")
try:
robot.LOG.debug(f"开始启动GlancesMonitor")
# 初始化 Glances 监控
monitor = GlancesMonitor(
email_sender=robot.email_sender,
host=config.glances.get("host"),
port=config.glances.get("port"),
cpu_threshold=80.0,
load_threshold=16, # 自动设为 CPU 核心数 * 2
io_threshold=100.0,
disk_usage_threshold=70.0,
handle_threshold=20000,
recipient=config.email.get("alert_recipient")
)
monitor.run()
except Exception as e:
robot.LOG.error(f"GlancesMonitor服务器启动失败: {e}")
# 启动后在“调度器同一事件循环”中预热 Markdown 转图浏览器。 # 启动后在“调度器同一事件循环”中预热 Markdown 转图浏览器。
# 这样可确保预热得到的常驻浏览器与后续截图任务复用同一 loop避免跨 loop 句柄失效。 # 这样可确保预热得到的常驻浏览器与后续截图任务复用同一 loop避免跨 loop 句柄失效。