服务器监控

This commit is contained in:
liuwei
2025-05-27 09:06:21 +08:00
parent 108735fdad
commit 60b6b9f491
6 changed files with 234 additions and 1 deletions

149
admin/GlancesMonitor.py Normal file
View File

@@ -0,0 +1,149 @@
import time
import threading
import subprocess
import requests
class GlancesMonitor:
def __init__(self, email_sender, host='localhost', port=61208,
cpu_threshold=80.0, load_threshold=None, io_threshold=80.0,
disk_usage_threshold=60.0, handle_threshold=20000,
recipient=None):
"""初始化 Glances 监控组件
Args:
email_sender: 已初始化的 EmailSender 实例
host (str): Glances 主机地址
port (int): Glances Web 服务端口
cpu_threshold (float): CPU 使用率阈值 (%)
load_threshold (float): 系统负载阈值(默认 CPU 核心数 * 2
io_threshold (float): 磁盘 I/O 阈值MB/s
disk_usage_threshold (float): 磁盘占用阈值 (%)
handle_threshold (int): 句柄数阈值
recipient (str): 告警邮件接收者
"""
self.host = host
self.port = port
self.cpu_threshold = cpu_threshold
self.load_threshold = load_threshold or (self.get_cpu_count() * 2)
self.io_threshold = io_threshold
self.disk_usage_threshold = disk_usage_threshold
self.handle_threshold = handle_threshold
self.email_sender = email_sender
self.recipient = recipient
self.glances_process = None
self.api_url = f"http://{self.host}:{self.port}/api/3"
self.last_alert_times = {}
self._running = False
def get_cpu_count(self):
"""获取 CPU 核心数"""
try:
response = requests.get(f"{self.api_url}/cpu")
response.raise_for_status()
return response.json().get('count', 1)
except Exception:
return 1
def start_glances(self):
"""启动 Glances Web 服务"""
try:
subprocess.run(['glances', '--version'], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
self.glances_process = subprocess.Popen(
['glances', '-w', f'--port', str(self.port)],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
time.sleep(2)
if self.glances_process.poll() is not None:
raise RuntimeError("Glances 启动失败")
print(f"Glances Web 服务已启动: http://{self.host}:{self.port}")
except subprocess.CalledProcessError:
print("错误: Glances 未安装。请运行: python3.11 -m pip install glances")
raise
except Exception as e:
print(f"启动 Glances 失败: {e}")
raise
def stop_glances(self):
"""停止 Glances 服务"""
if self.glances_process:
self.glances_process.terminate()
self.glances_process.wait()
print("Glances Web 服务已停止")
def send_alert_email(self, metric, value, threshold):
"""发送告警邮件,限制每小时一次"""
if not self.email_sender or not self.recipient:
return
current_time = time.time()
last_alert_time = self.last_alert_times.get(metric, 0)
if current_time - last_alert_time < 3600:
return
subject = f"服务器告警: {metric} 过高"
body = f"警告: {metric} 当前值为 {value},超过阈值 {threshold}\n时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}"
if self.email_sender.send_email(self.recipient, subject, body):
self.last_alert_times[metric] = current_time
def monitor(self):
"""监控服务器指标并触发告警"""
while self._running:
try:
response = requests.get(f"{self.api_url}/cpu/total")
response.raise_for_status()
cpu_usage = response.json().get('total', 0)
if cpu_usage > self.cpu_threshold:
self.send_alert_email("CPU 使用率", cpu_usage, self.cpu_threshold)
response = requests.get(f"{self.api_url}/load")
response.raise_for_status()
load_avg = response.json().get('min1', 0)
if load_avg > self.load_threshold:
self.send_alert_email("系统负载1分钟", load_avg, self.load_threshold)
response = requests.get(f"{self.api_url}/diskio")
response.raise_for_status()
disks = response.json()
max_io_usage = 0
for disk in disks:
read_bytes = disk.get('read_bytes', 0)
write_bytes = disk.get('write_bytes', 0)
io_usage = (read_bytes + write_bytes) / (1024 * 1024)
max_io_usage = max(max_io_usage, io_usage)
if max_io_usage > self.io_threshold:
self.send_alert_email("磁盘 I/OMB/s", max_io_usage, self.io_threshold)
response = requests.get(f"{self.api_url}/fs")
response.raise_for_status()
filesystems = response.json()
for fs in filesystems:
disk_usage = fs.get('percent', 0)
if disk_usage > self.disk_usage_threshold:
self.send_alert_email(f"磁盘占用 ({fs.get('mnt_point')})", disk_usage,
self.disk_usage_threshold)
response = requests.get(f"{self.api_url}/processcount")
response.raise_for_status()
handle_count = response.json().get('total', 0)
if handle_count > self.handle_threshold:
self.send_alert_email("句柄数", handle_count, self.handle_threshold)
time.sleep(10)
except requests.RequestException as e:
print(f"连接 Glances API 失败: {e}")
time.sleep(60)
except Exception as e:
print(f"监控错误: {e}")
time.sleep(60)
def run(self):
"""启动 Glances 服务和监控线程(非阻塞)"""
self._running = True
self.start_glances()
monitor_thread = threading.Thread(target=self.monitor, daemon=True)
monitor_thread.start()
def stop(self):
"""停止 Glances 服务和监控"""
self._running = False
self.stop_glances()

View File

@@ -17,6 +17,10 @@ APP_START_TIME = time.time()
@login_required
def api_docs():
return render_template('api_docs.html')
@system_bp.route('/system_status')
@login_required
def api_docs():
return render_template('system_status.html')
# 页面路由
@system_bp.route('/wx_logs')
@login_required

View File

@@ -176,6 +176,10 @@
<i class="el-icon-document"></i>
<span slot="title">接口文档</span>
</el-menu-item>
<el-menu-item index="14">
<i class="el-icon-document"></i>
<span slot="title">服务器监控</span>
</el-menu-item>
</el-menu>
</div>
@@ -241,7 +245,8 @@
'10': '/contacts',
'11': '/plugins_manage',
'12': '/virtual_group',
'13':'/api_docs'
'13':'/api_docs',
'14':'/system_status'
};
// 如果当前不在对应页面,则跳转

View File

@@ -0,0 +1,53 @@
{% extends "base.html" %}
{% block title %}服务器监控 - 机器人管理后台{% endblock %}
{% block content %}
<!-- 接口文档 -->
<div>
<el-row :gutter="20">
<el-col :span="24">
<el-card shadow="hover">
<div slot="header">
<span>服务器监控</span>
</div>
<div class="iframe-container">
<iframe src="http://localhost:61208/" frameborder="0" width="100%" height="800px"></iframe>
</div>
</el-card>
</el-col>
</el-row>
</div>
{% endblock %}
{% block scripts %}
<script>
new Vue({
el: '#app',
mixins: [baseApp],
data() {
return {
// 设置当前菜单项
currentView: '14' // 使用一个新的索引,确保在菜单中能正确高亮
}
},
mounted() {
// 页面加载时设置当前视图
this.currentView = '14';
}
});
</script>
<style>
.iframe-container {
width: 100%;
overflow: hidden;
}
.iframe-container iframe {
border: none;
width: 100%;
min-height: 800px;
}
</style>
{% endblock %}

19
main.py
View File

@@ -3,6 +3,7 @@
import asyncio
import threading
from admin.GlancesMonitor import GlancesMonitor
from utils.decorator.async_job import async_job
from configuration import Config
from plugins.xiuren_image.images_cache import ImageCacheManager
@@ -67,6 +68,24 @@ def main():
robot.LOG.error(f"Dashboard服务器启动失败: {e}")
asyncio.run(async_job.run_all())
try:
# 初始化 Glances 监控
monitor = GlancesMonitor(
email_sender=robot.email_sender,
host='localhost',
port=61208,
cpu_threshold=80.0,
load_threshold=None, # 自动设为 CPU 核心数 * 2
io_threshold=80.0,
disk_usage_threshold=60.0,
handle_threshold=20000,
recipient=config.email.get("alert_recipient")
)
monitor.run()
except Exception as e:
robot.LOG.error(f"GlancesMonitor服务器启动失败: {e}")
# 让机器人一直跑
robot.keep_running_and_block_process()

View File

@@ -49,3 +49,6 @@ pymediainfo~=7.0.1
loguru~=0.7.3
opencv-python~=4.11.0.86
pathlib~=1.0.1
Glances~=4.3.1
yagmail~=0.15.293