import time import threading import subprocess import requests from loguru import logger class GlancesMonitor: def __init__(self, email_sender, host='192.168.2.170', port=61208, cpu_threshold=80.0, load_threshold=None, io_threshold=80.0, disk_usage_threshold=80.0, handle_threshold=20000, recipient=None): """初始化 Glances 监控组件 Args: email_sender: 已初始化的 EmailSender 实例 host (str): Glances 主机地址 port (int): Glances Web 服务端口 cpu_threshold (float): CPU 使用率阈值 (%) load_threshold (float): 系统负载阈值(默认 CPU 核心数 * 2) io_threshold (float): 磁盘 I/O 阈值(MB/s) disk_usage_threshold (float): 磁盘占用阈值 (%) handle_threshold (int): 句柄数阈值 recipient (str): 告警邮件接收者 """ self.host = host self.port = port self.api_url = f"http://{self.host}:{self.port}/api/4" self.cpu_threshold = cpu_threshold self.load_threshold = load_threshold or (self.get_cpu_count() * 2) self.io_threshold = io_threshold self.disk_usage_threshold = disk_usage_threshold self.handle_threshold = handle_threshold self.email_sender = email_sender self.recipient = recipient self.glances_process = None self.last_alert_times = {} self._running = False def get_cpu_count(self): """获取 CPU 核心数""" try: response = requests.get(f"{self.api_url}/cpu") response.raise_for_status() return response.json().get('count', 1) except Exception as e: logger.error(e) return 1 def start_glances(self): """启动 Glances Web 服务""" try: subprocess.run(['glances', '--version'], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) self.glances_process = subprocess.Popen( ['glances', '-w', f'--port', str(self.port)], stdout=subprocess.PIPE, stderr=subprocess.PIPE ) time.sleep(2) if self.glances_process.poll() is not None: raise RuntimeError("Glances 启动失败") logger.info(f"Glances Web 服务已启动: http://{self.host}:{self.port}") except subprocess.CalledProcessError: logger.error("错误: Glances 未安装。请运行: python3.11 -m pip install glances") raise except Exception as e: logger.error(f"启动 Glances 失败: {e}") raise def stop_glances(self): """停止 Glances 服务""" if self.glances_process: self.glances_process.terminate() self.glances_process.wait() logger.error("Glances Web 服务已停止") def send_alert_email(self, metric, value, threshold): """发送告警邮件,限制每小时一次""" if not self.email_sender or not self.recipient: return current_time = time.time() last_alert_time = self.last_alert_times.get(metric, 0) if current_time - last_alert_time < 3600: return subject = f"服务器告警: {metric} 过高" body = f"警告: {metric} 当前值为 {value},超过阈值 {threshold}!\n时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}" if self.email_sender.send_email(self.recipient, subject, body): self.last_alert_times[metric] = current_time def monitor(self): """监控服务器指标并触发告警""" while self._running: try: response = requests.get(f"{self.api_url}/cpu/total") response.raise_for_status() cpu_usage = response.json().get('total', 0) if cpu_usage > self.cpu_threshold: self.send_alert_email("CPU 使用率", cpu_usage, self.cpu_threshold) response = requests.get(f"{self.api_url}/load") response.raise_for_status() load_avg = response.json().get('min1', 0) if load_avg > self.load_threshold: self.send_alert_email("系统负载(1分钟)", load_avg, self.load_threshold) response = requests.get(f"{self.api_url}/diskio") response.raise_for_status() disks = response.json() max_io_usage = 0 for disk in disks: read_bytes = disk.get('read_bytes', 0) write_bytes = disk.get('write_bytes', 0) io_usage = (read_bytes + write_bytes) / (2048 * 1024) max_io_usage = max(max_io_usage, io_usage) if max_io_usage > self.io_threshold: self.send_alert_email("磁盘 I/O(MB/s)", max_io_usage, self.io_threshold) response = requests.get(f"{self.api_url}/fs") response.raise_for_status() filesystems = response.json() for fs in filesystems: disk_usage = fs.get('percent', 0) if disk_usage > self.disk_usage_threshold: self.send_alert_email(f"磁盘占用 ({fs.get('mnt_point')})", disk_usage, self.disk_usage_threshold) response = requests.get(f"{self.api_url}/processcount") response.raise_for_status() handle_count = response.json().get('total', 0) if handle_count > self.handle_threshold: self.send_alert_email("句柄数", handle_count, self.handle_threshold) time.sleep(10) except requests.RequestException as e: logger.error(f"连接 Glances API 失败: {e}") time.sleep(60) except Exception as e: logger.error(f"监控错误: {e}") time.sleep(60) def run(self): """启动 Glances 服务和监控线程(非阻塞)""" self._running = True self.start_glances() monitor_thread = threading.Thread(target=self.monitor, daemon=True) monitor_thread.start() def stop(self): """停止 Glances 服务和监控""" self._running = False self.stop_glances()