服务器监控
This commit is contained in:
149
admin/GlancesMonitor.py
Normal file
149
admin/GlancesMonitor.py
Normal file
@@ -0,0 +1,149 @@
|
||||
import time
|
||||
import threading
|
||||
import subprocess
|
||||
import requests
|
||||
|
||||
|
||||
class GlancesMonitor:
|
||||
def __init__(self, email_sender, host='localhost', port=61208,
|
||||
cpu_threshold=80.0, load_threshold=None, io_threshold=80.0,
|
||||
disk_usage_threshold=60.0, handle_threshold=20000,
|
||||
recipient=None):
|
||||
"""初始化 Glances 监控组件
|
||||
|
||||
Args:
|
||||
email_sender: 已初始化的 EmailSender 实例
|
||||
host (str): Glances 主机地址
|
||||
port (int): Glances Web 服务端口
|
||||
cpu_threshold (float): CPU 使用率阈值 (%)
|
||||
load_threshold (float): 系统负载阈值(默认 CPU 核心数 * 2)
|
||||
io_threshold (float): 磁盘 I/O 阈值(MB/s)
|
||||
disk_usage_threshold (float): 磁盘占用阈值 (%)
|
||||
handle_threshold (int): 句柄数阈值
|
||||
recipient (str): 告警邮件接收者
|
||||
"""
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.cpu_threshold = cpu_threshold
|
||||
self.load_threshold = load_threshold or (self.get_cpu_count() * 2)
|
||||
self.io_threshold = io_threshold
|
||||
self.disk_usage_threshold = disk_usage_threshold
|
||||
self.handle_threshold = handle_threshold
|
||||
self.email_sender = email_sender
|
||||
self.recipient = recipient
|
||||
self.glances_process = None
|
||||
self.api_url = f"http://{self.host}:{self.port}/api/3"
|
||||
self.last_alert_times = {}
|
||||
self._running = False
|
||||
|
||||
def get_cpu_count(self):
|
||||
"""获取 CPU 核心数"""
|
||||
try:
|
||||
response = requests.get(f"{self.api_url}/cpu")
|
||||
response.raise_for_status()
|
||||
return response.json().get('count', 1)
|
||||
except Exception:
|
||||
return 1
|
||||
|
||||
def start_glances(self):
|
||||
"""启动 Glances Web 服务"""
|
||||
try:
|
||||
subprocess.run(['glances', '--version'], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
self.glances_process = subprocess.Popen(
|
||||
['glances', '-w', f'--port', str(self.port)],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE
|
||||
)
|
||||
time.sleep(2)
|
||||
if self.glances_process.poll() is not None:
|
||||
raise RuntimeError("Glances 启动失败")
|
||||
print(f"Glances Web 服务已启动: http://{self.host}:{self.port}")
|
||||
except subprocess.CalledProcessError:
|
||||
print("错误: Glances 未安装。请运行: python3.11 -m pip install glances")
|
||||
raise
|
||||
except Exception as e:
|
||||
print(f"启动 Glances 失败: {e}")
|
||||
raise
|
||||
|
||||
def stop_glances(self):
|
||||
"""停止 Glances 服务"""
|
||||
if self.glances_process:
|
||||
self.glances_process.terminate()
|
||||
self.glances_process.wait()
|
||||
print("Glances Web 服务已停止")
|
||||
|
||||
def send_alert_email(self, metric, value, threshold):
|
||||
"""发送告警邮件,限制每小时一次"""
|
||||
if not self.email_sender or not self.recipient:
|
||||
return
|
||||
current_time = time.time()
|
||||
last_alert_time = self.last_alert_times.get(metric, 0)
|
||||
if current_time - last_alert_time < 3600:
|
||||
return
|
||||
subject = f"服务器告警: {metric} 过高"
|
||||
body = f"警告: {metric} 当前值为 {value},超过阈值 {threshold}!\n时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}"
|
||||
if self.email_sender.send_email(self.recipient, subject, body):
|
||||
self.last_alert_times[metric] = current_time
|
||||
|
||||
def monitor(self):
|
||||
"""监控服务器指标并触发告警"""
|
||||
while self._running:
|
||||
try:
|
||||
response = requests.get(f"{self.api_url}/cpu/total")
|
||||
response.raise_for_status()
|
||||
cpu_usage = response.json().get('total', 0)
|
||||
if cpu_usage > self.cpu_threshold:
|
||||
self.send_alert_email("CPU 使用率", cpu_usage, self.cpu_threshold)
|
||||
|
||||
response = requests.get(f"{self.api_url}/load")
|
||||
response.raise_for_status()
|
||||
load_avg = response.json().get('min1', 0)
|
||||
if load_avg > self.load_threshold:
|
||||
self.send_alert_email("系统负载(1分钟)", load_avg, self.load_threshold)
|
||||
|
||||
response = requests.get(f"{self.api_url}/diskio")
|
||||
response.raise_for_status()
|
||||
disks = response.json()
|
||||
max_io_usage = 0
|
||||
for disk in disks:
|
||||
read_bytes = disk.get('read_bytes', 0)
|
||||
write_bytes = disk.get('write_bytes', 0)
|
||||
io_usage = (read_bytes + write_bytes) / (1024 * 1024)
|
||||
max_io_usage = max(max_io_usage, io_usage)
|
||||
if max_io_usage > self.io_threshold:
|
||||
self.send_alert_email("磁盘 I/O(MB/s)", max_io_usage, self.io_threshold)
|
||||
|
||||
response = requests.get(f"{self.api_url}/fs")
|
||||
response.raise_for_status()
|
||||
filesystems = response.json()
|
||||
for fs in filesystems:
|
||||
disk_usage = fs.get('percent', 0)
|
||||
if disk_usage > self.disk_usage_threshold:
|
||||
self.send_alert_email(f"磁盘占用 ({fs.get('mnt_point')})", disk_usage,
|
||||
self.disk_usage_threshold)
|
||||
|
||||
response = requests.get(f"{self.api_url}/processcount")
|
||||
response.raise_for_status()
|
||||
handle_count = response.json().get('total', 0)
|
||||
if handle_count > self.handle_threshold:
|
||||
self.send_alert_email("句柄数", handle_count, self.handle_threshold)
|
||||
|
||||
time.sleep(10)
|
||||
except requests.RequestException as e:
|
||||
print(f"连接 Glances API 失败: {e}")
|
||||
time.sleep(60)
|
||||
except Exception as e:
|
||||
print(f"监控错误: {e}")
|
||||
time.sleep(60)
|
||||
|
||||
def run(self):
|
||||
"""启动 Glances 服务和监控线程(非阻塞)"""
|
||||
self._running = True
|
||||
self.start_glances()
|
||||
monitor_thread = threading.Thread(target=self.monitor, daemon=True)
|
||||
monitor_thread.start()
|
||||
|
||||
def stop(self):
|
||||
"""停止 Glances 服务和监控"""
|
||||
self._running = False
|
||||
self.stop_glances()
|
||||
@@ -17,6 +17,10 @@ APP_START_TIME = time.time()
|
||||
@login_required
|
||||
def api_docs():
|
||||
return render_template('api_docs.html')
|
||||
@system_bp.route('/system_status')
|
||||
@login_required
|
||||
def api_docs():
|
||||
return render_template('system_status.html')
|
||||
# 页面路由
|
||||
@system_bp.route('/wx_logs')
|
||||
@login_required
|
||||
|
||||
@@ -176,6 +176,10 @@
|
||||
<i class="el-icon-document"></i>
|
||||
<span slot="title">接口文档</span>
|
||||
</el-menu-item>
|
||||
<el-menu-item index="14">
|
||||
<i class="el-icon-document"></i>
|
||||
<span slot="title">服务器监控</span>
|
||||
</el-menu-item>
|
||||
</el-menu>
|
||||
</div>
|
||||
|
||||
@@ -241,7 +245,8 @@
|
||||
'10': '/contacts',
|
||||
'11': '/plugins_manage',
|
||||
'12': '/virtual_group',
|
||||
'13':'/api_docs'
|
||||
'13':'/api_docs',
|
||||
'14':'/system_status'
|
||||
};
|
||||
|
||||
// 如果当前不在对应页面,则跳转
|
||||
|
||||
53
admin/dashboard/templates/system_status.html
Normal file
53
admin/dashboard/templates/system_status.html
Normal file
@@ -0,0 +1,53 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}服务器监控 - 机器人管理后台{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<!-- 接口文档 -->
|
||||
<div>
|
||||
<el-row :gutter="20">
|
||||
<el-col :span="24">
|
||||
<el-card shadow="hover">
|
||||
<div slot="header">
|
||||
<span>服务器监控</span>
|
||||
</div>
|
||||
<div class="iframe-container">
|
||||
<iframe src="http://localhost:61208/" frameborder="0" width="100%" height="800px"></iframe>
|
||||
</div>
|
||||
</el-card>
|
||||
</el-col>
|
||||
</el-row>
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
||||
{% block scripts %}
|
||||
<script>
|
||||
new Vue({
|
||||
el: '#app',
|
||||
mixins: [baseApp],
|
||||
data() {
|
||||
return {
|
||||
// 设置当前菜单项
|
||||
currentView: '14' // 使用一个新的索引,确保在菜单中能正确高亮
|
||||
}
|
||||
},
|
||||
mounted() {
|
||||
// 页面加载时设置当前视图
|
||||
this.currentView = '14';
|
||||
}
|
||||
});
|
||||
</script>
|
||||
|
||||
<style>
|
||||
.iframe-container {
|
||||
width: 100%;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.iframe-container iframe {
|
||||
border: none;
|
||||
width: 100%;
|
||||
min-height: 800px;
|
||||
}
|
||||
</style>
|
||||
{% endblock %}
|
||||
19
main.py
19
main.py
@@ -3,6 +3,7 @@
|
||||
import asyncio
|
||||
import threading
|
||||
|
||||
from admin.GlancesMonitor import GlancesMonitor
|
||||
from utils.decorator.async_job import async_job
|
||||
from configuration import Config
|
||||
from plugins.xiuren_image.images_cache import ImageCacheManager
|
||||
@@ -67,6 +68,24 @@ def main():
|
||||
robot.LOG.error(f"Dashboard服务器启动失败: {e}")
|
||||
|
||||
asyncio.run(async_job.run_all())
|
||||
|
||||
try:
|
||||
# 初始化 Glances 监控
|
||||
monitor = GlancesMonitor(
|
||||
email_sender=robot.email_sender,
|
||||
host='localhost',
|
||||
port=61208,
|
||||
cpu_threshold=80.0,
|
||||
load_threshold=None, # 自动设为 CPU 核心数 * 2
|
||||
io_threshold=80.0,
|
||||
disk_usage_threshold=60.0,
|
||||
handle_threshold=20000,
|
||||
recipient=config.email.get("alert_recipient")
|
||||
)
|
||||
monitor.run()
|
||||
except Exception as e:
|
||||
robot.LOG.error(f"GlancesMonitor服务器启动失败: {e}")
|
||||
|
||||
# 让机器人一直跑
|
||||
robot.keep_running_and_block_process()
|
||||
|
||||
|
||||
@@ -49,3 +49,6 @@ pymediainfo~=7.0.1
|
||||
loguru~=0.7.3
|
||||
opencv-python~=4.11.0.86
|
||||
pathlib~=1.0.1
|
||||
|
||||
Glances~=4.3.1
|
||||
yagmail~=0.15.293
|
||||
Reference in New Issue
Block a user