Files
abot/admin/GlancesMonitor.py
2025-09-26 15:30:19 +08:00

153 lines
6.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import time
import threading
import subprocess
import requests
from loguru import logger
class GlancesMonitor:
def __init__(self, email_sender, host='192.168.2.170', port=61208,
cpu_threshold=80.0, load_threshold=None, io_threshold=80.0,
disk_usage_threshold=60.0, handle_threshold=20000,
recipient=None):
"""初始化 Glances 监控组件
Args:
email_sender: 已初始化的 EmailSender 实例
host (str): Glances 主机地址
port (int): Glances Web 服务端口
cpu_threshold (float): CPU 使用率阈值 (%)
load_threshold (float): 系统负载阈值(默认 CPU 核心数 * 2
io_threshold (float): 磁盘 I/O 阈值MB/s
disk_usage_threshold (float): 磁盘占用阈值 (%)
handle_threshold (int): 句柄数阈值
recipient (str): 告警邮件接收者
"""
self.host = host
self.port = port
self.api_url = f"http://{self.host}:{self.port}/api/4"
self.cpu_threshold = cpu_threshold
self.load_threshold = load_threshold or (self.get_cpu_count() * 2)
self.io_threshold = io_threshold
self.disk_usage_threshold = disk_usage_threshold
self.handle_threshold = handle_threshold
self.email_sender = email_sender
self.recipient = recipient
self.glances_process = None
self.last_alert_times = {}
self._running = False
def get_cpu_count(self):
"""获取 CPU 核心数"""
try:
response = requests.get(f"{self.api_url}/cpu")
response.raise_for_status()
return response.json().get('count', 1)
except Exception as e:
logger.error(e)
return 1
def start_glances(self):
"""启动 Glances Web 服务"""
try:
subprocess.run(['glances', '--version'], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
self.glances_process = subprocess.Popen(
['glances', '-w', f'--port', str(self.port)],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
time.sleep(2)
if self.glances_process.poll() is not None:
raise RuntimeError("Glances 启动失败")
logger.info(f"Glances Web 服务已启动: http://{self.host}:{self.port}")
except subprocess.CalledProcessError:
logger.error("错误: Glances 未安装。请运行: python3.11 -m pip install glances")
raise
except Exception as e:
logger.error(f"启动 Glances 失败: {e}")
raise
def stop_glances(self):
"""停止 Glances 服务"""
if self.glances_process:
self.glances_process.terminate()
self.glances_process.wait()
logger.error("Glances Web 服务已停止")
def send_alert_email(self, metric, value, threshold):
"""发送告警邮件,限制每小时一次"""
if not self.email_sender or not self.recipient:
return
current_time = time.time()
last_alert_time = self.last_alert_times.get(metric, 0)
if current_time - last_alert_time < 3600:
return
subject = f"服务器告警: {metric} 过高"
body = f"警告: {metric} 当前值为 {value},超过阈值 {threshold}\n时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}"
if self.email_sender.send_email(self.recipient, subject, body):
self.last_alert_times[metric] = current_time
def monitor(self):
"""监控服务器指标并触发告警"""
while self._running:
try:
response = requests.get(f"{self.api_url}/cpu/total")
response.raise_for_status()
cpu_usage = response.json().get('total', 0)
if cpu_usage > self.cpu_threshold:
self.send_alert_email("CPU 使用率", cpu_usage, self.cpu_threshold)
response = requests.get(f"{self.api_url}/load")
response.raise_for_status()
load_avg = response.json().get('min1', 0)
if load_avg > self.load_threshold:
self.send_alert_email("系统负载1分钟", load_avg, self.load_threshold)
response = requests.get(f"{self.api_url}/diskio")
response.raise_for_status()
disks = response.json()
max_io_usage = 0
for disk in disks:
read_bytes = disk.get('read_bytes', 0)
write_bytes = disk.get('write_bytes', 0)
io_usage = (read_bytes + write_bytes) / (2048 * 1024)
max_io_usage = max(max_io_usage, io_usage)
if max_io_usage > self.io_threshold:
self.send_alert_email("磁盘 I/OMB/s", max_io_usage, self.io_threshold)
response = requests.get(f"{self.api_url}/fs")
response.raise_for_status()
filesystems = response.json()
for fs in filesystems:
disk_usage = fs.get('percent', 0)
if disk_usage > self.disk_usage_threshold:
self.send_alert_email(f"磁盘占用 ({fs.get('mnt_point')})", disk_usage,
self.disk_usage_threshold)
response = requests.get(f"{self.api_url}/processcount")
response.raise_for_status()
handle_count = response.json().get('total', 0)
if handle_count > self.handle_threshold:
self.send_alert_email("句柄数", handle_count, self.handle_threshold)
time.sleep(10)
except requests.RequestException as e:
logger.error(f"连接 Glances API 失败: {e}")
time.sleep(60)
except Exception as e:
logger.error(f"监控错误: {e}")
time.sleep(60)
def run(self):
"""启动 Glances 服务和监控线程(非阻塞)"""
self._running = True
self.start_glances()
monitor_thread = threading.Thread(target=self.monitor, daemon=True)
monitor_thread.start()
def stop(self):
"""停止 Glances 服务和监控"""
self._running = False
self.stop_glances()