feat: 优化整体项目

This commit is contained in:
2025-12-05 18:06:13 +08:00
parent b4df26f61d
commit 7d3ef70093
13 changed files with 2661 additions and 305 deletions

362
bot.py
View File

@@ -2,10 +2,19 @@
WechatHookBot - 主入口
基于个微大客户版 Hook API 的微信机器人框架
优化功能:
- 优先级消息队列
- 自适应熔断器
- 配置热更新
- 性能监控
- 优雅关闭
"""
import asyncio
import signal
import sys
import time
import tomllib
from pathlib import Path
from loguru import logger
@@ -13,6 +22,8 @@ from loguru import logger
from WechatHook import NoveLoader, WechatHookClient
from WechatHook.callbacks import (
add_callback_handler,
remove_callback_handler,
clear_all_callbacks,
wechat_connect_callback,
wechat_recv_callback,
wechat_close_callback,
@@ -23,7 +34,15 @@ from WechatHook.callbacks import (
from utils.hookbot import HookBot
from utils.plugin_manager import PluginManager
from utils.decorators import scheduler
# from database import KeyvalDB, MessageDB # 不需要数据库
from utils.bot_utils import (
PriorityMessageQueue,
MessagePriority,
PRIORITY_MESSAGE_TYPES,
AdaptiveCircuitBreaker,
ConfigWatcher,
PerformanceMonitor,
get_performance_monitor
)
class BotService:
@@ -37,17 +56,24 @@ class BotService:
self.process_id = None # 微信进程 ID
self.socket_client_id = None # Socket 客户端 ID
self.is_running = False
self.is_shutting_down = False # 是否正在关闭
self.event_loop = None # 事件循环引用
# 消息队列和性能控制
self.message_queue = None
self.message_queue: PriorityMessageQueue = None # 优先级消息队列
self.queue_config = {}
self.concurrency_config = {}
self.consumer_tasks = []
self.processing_semaphore = None
self.circuit_breaker_failures = 0
self.circuit_breaker_open = False
self.circuit_breaker_last_failure = 0
# 自适应熔断器
self.circuit_breaker: AdaptiveCircuitBreaker = None
# 配置热更新
self.config_watcher: ConfigWatcher = None
# 性能监控
self.performance_monitor: PerformanceMonitor = None
@CONNECT_CALLBACK(in_class=True)
def on_connect(self, client_id):
@@ -85,118 +111,125 @@ class BotService:
logger.error(f"消息入队失败: {e}")
async def _enqueue_message(self, msg_type, data):
"""将消息加入队列"""
"""将消息加入优先级队列"""
try:
# 记录收到消息
if self.performance_monitor:
self.performance_monitor.record_message_received()
# 检查队列是否已满
if self.message_queue.qsize() >= self.queue_config.get("max_size", 1000):
if self.message_queue.full():
overflow_strategy = self.queue_config.get("overflow_strategy", "drop_oldest")
if overflow_strategy == "drop_oldest":
# 丢弃最旧的消息
try:
self.message_queue.get_nowait()
logger.warning("队列已满,丢弃最旧消息")
except asyncio.QueueEmpty:
pass
# 丢弃优先级最低的消息
if self.message_queue.drop_lowest_priority():
logger.warning("队列已满,丢弃优先级最低的消息")
if self.performance_monitor:
self.performance_monitor.record_message_dropped()
elif overflow_strategy == "sampling":
# 采样处理,随机丢弃
# 采样处理,随机丢弃(但高优先级消息不丢弃)
import random
if random.random() < 0.5: # 50% 概率丢弃
priority = PRIORITY_MESSAGE_TYPES.get(msg_type, MessagePriority.NORMAL)
if priority < MessagePriority.HIGH and random.random() < 0.5:
logger.debug("队列压力大,采样丢弃消息")
if self.performance_monitor:
self.performance_monitor.record_message_dropped()
return
else: # degrade
logger.warning("队列已满,降级处理")
return
# 降级处理(但高优先级消息不丢弃)
priority = PRIORITY_MESSAGE_TYPES.get(msg_type, MessagePriority.NORMAL)
if priority < MessagePriority.HIGH:
logger.warning("队列已满,降级处理")
if self.performance_monitor:
self.performance_monitor.record_message_dropped()
return
# 将消息放入优先级队列
await self.message_queue.put(msg_type, data)
# 记录队列大小
if self.performance_monitor:
self.performance_monitor.record_queue_size(self.message_queue.qsize())
# 将消息放入队列
await self.message_queue.put((msg_type, data))
except Exception as e:
logger.error(f"消息入队异常: {e}")
async def _message_consumer(self, consumer_id: int):
"""消息消费者协程"""
logger.info(f"消息消费者 {consumer_id} 已启动")
while self.is_running:
"""消息消费者协程 - 纯队列串行模式,避免并发触发风控"""
logger.info(f"消息消费者 {consumer_id} 已启动(串行模式)")
while self.is_running and not self.is_shutting_down:
try:
# 从队列获取消息,设置超时避免无限等待
msg_type, data = await asyncio.wait_for(
self.message_queue.get(),
self.message_queue.get(),
timeout=1.0
)
# 检查熔断器状态
if self._check_circuit_breaker():
if self.circuit_breaker and self.circuit_breaker.is_open():
logger.debug("熔断器开启,跳过消息处理")
self.circuit_breaker.record_rejection()
self.message_queue.task_done()
continue
# 创建并发任务,不等待完成
timeout = self.concurrency_config.get("plugin_task_timeout_seconds", 5)
# 使用信号量控制并发数量
async def process_with_semaphore():
async with self.processing_semaphore:
try:
await asyncio.wait_for(
self.hookbot.process_message(msg_type, data),
timeout=timeout
)
self._reset_circuit_breaker()
except asyncio.TimeoutError:
logger.warning(f"消息处理超时 (>{timeout}s): type={msg_type}")
self._record_circuit_breaker_failure()
except Exception as e:
logger.error(f"消息处理异常: {e}")
self._record_circuit_breaker_failure()
# 创建任务但不等待,实现真正并发
asyncio.create_task(process_with_semaphore())
# 串行处理:等待当前消息处理完成后再处理下一条
timeout = self.concurrency_config.get("plugin_task_timeout_seconds", 720)
start_time = time.time()
try:
await asyncio.wait_for(
self.hookbot.process_message(msg_type, data),
timeout=timeout
)
# 记录成功
processing_time = time.time() - start_time
if self.circuit_breaker:
self.circuit_breaker.record_success()
if self.performance_monitor:
self.performance_monitor.record_message_processed(processing_time)
except asyncio.TimeoutError:
logger.warning(f"消息处理超时 (>{timeout}s): type={msg_type}")
if self.circuit_breaker:
self.circuit_breaker.record_failure()
if self.performance_monitor:
self.performance_monitor.record_message_failed()
except Exception as e:
logger.error(f"消息处理异常: {e}")
if self.circuit_breaker:
self.circuit_breaker.record_failure()
if self.performance_monitor:
self.performance_monitor.record_message_failed()
# 标记任务完成
self.message_queue.task_done()
# 更新熔断器统计
if self.performance_monitor and self.circuit_breaker:
self.performance_monitor.update_circuit_breaker_stats(
self.circuit_breaker.get_stats()
)
# 消息间隔,避免发送太快触发风控
message_interval = self.concurrency_config.get("message_interval_ms", 100)
if message_interval > 0:
await asyncio.sleep(message_interval / 1000.0)
except asyncio.TimeoutError:
# 队列为空,继续等待
continue
except asyncio.CancelledError:
# 任务被取消,退出循环
logger.info(f"消费者 {consumer_id} 收到取消信号")
break
except Exception as e:
logger.error(f"消费者 {consumer_id} 异常: {e}")
await asyncio.sleep(0.1) # 短暂休息避免忙等
def _check_circuit_breaker(self) -> bool:
"""检查熔断器状态"""
if not self.concurrency_config.get("enable_circuit_breaker", True):
return False
if self.circuit_breaker_open:
# 检查是否可以尝试恢复
import time
if time.time() - self.circuit_breaker_last_failure > 30: # 30秒后尝试恢复
self.circuit_breaker_open = False
self.circuit_breaker_failures = 0
logger.info("熔断器尝试恢复")
return False
return True
return False
def _record_circuit_breaker_failure(self):
"""记录熔断器失败"""
if not self.concurrency_config.get("enable_circuit_breaker", True):
return
self.circuit_breaker_failures += 1
threshold = self.concurrency_config.get("circuit_breaker_threshold", 5)
if self.circuit_breaker_failures >= threshold:
import time
self.circuit_breaker_open = True
self.circuit_breaker_last_failure = time.time()
logger.warning(f"熔断器开启,连续失败 {self.circuit_breaker_failures}")
def _reset_circuit_breaker(self):
"""重置熔断器"""
if self.circuit_breaker_failures > 0:
self.circuit_breaker_failures = 0
logger.info(f"消费者 {consumer_id} 已退出")
@CLOSE_CALLBACK(in_class=True)
def on_close(self, client_id):
@@ -235,17 +268,37 @@ class BotService:
# 初始化性能配置
self.queue_config = config.get("Queue", {})
self.concurrency_config = config.get("Concurrency", {})
# 创建消息队列
# 创建优先级消息队列
queue_size = self.queue_config.get("max_size", 1000)
self.message_queue = asyncio.Queue(maxsize=queue_size)
logger.info(f"消息队列已创建,容量: {queue_size}")
self.message_queue = PriorityMessageQueue(maxsize=queue_size)
logger.info(f"优先级消息队列已创建,容量: {queue_size}")
# 创建并发控制信号量
max_concurrency = self.concurrency_config.get("plugin_max_concurrency", 8)
self.processing_semaphore = asyncio.Semaphore(max_concurrency)
logger.info(f"并发控制已设置,最大并发: {max_concurrency}")
# 创建自适应熔断器
if self.concurrency_config.get("enable_circuit_breaker", True):
self.circuit_breaker = AdaptiveCircuitBreaker(
failure_threshold=self.concurrency_config.get("circuit_breaker_threshold", 10),
success_threshold=3,
initial_recovery_time=5.0,
max_recovery_time=300.0
)
logger.info("自适应熔断器已创建")
# 创建性能监控器
self.performance_monitor = get_performance_monitor()
logger.info("性能监控器已创建")
# 创建配置热更新监听器
self.config_watcher = ConfigWatcher("main_config.toml", check_interval=5.0)
self.config_watcher.register_callback(self._on_config_update)
await self.config_watcher.start()
logger.info("配置热更新监听器已启动")
# 不需要数据库(简化版本)
# 获取 DLL 路径
@@ -340,6 +393,26 @@ class BotService:
return True
def _on_config_update(self, new_config: dict):
"""配置热更新回调"""
logger.info("正在应用新配置...")
# 更新队列配置
self.queue_config = new_config.get("Queue", self.queue_config)
# 更新并发配置
old_concurrency = self.concurrency_config
self.concurrency_config = new_config.get("Concurrency", self.concurrency_config)
# 更新熔断器配置
if self.circuit_breaker:
new_threshold = self.concurrency_config.get("circuit_breaker_threshold", 10)
if new_threshold != old_concurrency.get("circuit_breaker_threshold", 10):
self.circuit_breaker.failure_threshold = new_threshold
logger.info(f"熔断器阈值已更新: {new_threshold}")
logger.success("配置热更新完成")
async def run(self):
"""运行机器人"""
if not await self.initialize():
@@ -347,6 +420,15 @@ class BotService:
self.is_running = True
# 启动定期性能报告
async def periodic_stats():
while self.is_running:
await asyncio.sleep(300) # 每5分钟输出一次
if self.performance_monitor and self.is_running:
self.performance_monitor.print_stats()
stats_task = asyncio.create_task(periodic_stats())
try:
logger.info("机器人正在运行,按 Ctrl+C 停止...")
while self.is_running:
@@ -354,44 +436,96 @@ class BotService:
except KeyboardInterrupt:
logger.info("收到停止信号...")
finally:
stats_task.cancel()
await self.stop()
async def stop(self):
"""停止机器人"""
logger.info("正在停止机器人...")
self.is_running = False
"""优雅关闭机器人"""
if self.is_shutting_down:
return
self.is_shutting_down = True
# 停止消息消费者
logger.info("=" * 60)
logger.info("正在优雅关闭机器人...")
logger.info("=" * 60)
# 1. 停止接收新消息
self.is_running = False
logger.info("[1/7] 停止接收新消息")
# 2. 等待队列中的消息处理完成(带超时)
if self.message_queue and not self.message_queue.empty():
queue_size = self.message_queue.qsize()
logger.info(f"[2/7] 等待队列中 {queue_size} 条消息处理完成...")
try:
await asyncio.wait_for(
self.message_queue.join(),
timeout=30
)
logger.info("[2/7] 队列消息已全部处理完成")
except asyncio.TimeoutError:
logger.warning("[2/7] 队列消息未在 30 秒内处理完成,强制清空")
# 清空剩余消息
while not self.message_queue.empty():
try:
self.message_queue.get_nowait()
self.message_queue.task_done()
except:
break
else:
logger.info("[2/7] 队列为空,无需等待")
# 3. 停止消息消费者
if self.consumer_tasks:
logger.info("正在停止消息消费者...")
logger.info(f"[3/7] 停止 {len(self.consumer_tasks)}消息消费者...")
for task in self.consumer_tasks:
task.cancel()
# 等待所有消费者任务完成
if self.consumer_tasks:
await asyncio.gather(*self.consumer_tasks, return_exceptions=True)
await asyncio.gather(*self.consumer_tasks, return_exceptions=True)
self.consumer_tasks.clear()
logger.info("消息消费者已停止")
logger.info("[3/7] 消息消费者已停止")
else:
logger.info("[3/7] 无消费者需要停止")
# 清空消息队列
if self.message_queue:
while not self.message_queue.empty():
try:
self.message_queue.get_nowait()
self.message_queue.task_done()
except asyncio.QueueEmpty:
break
logger.info("消息队列已清空")
# 4. 停止配置监听器
if self.config_watcher:
logger.info("[4/7] 停止配置监听器...")
await self.config_watcher.stop()
logger.info("[4/7] 配置监听器已停止")
else:
logger.info("[4/7] 无配置监听器")
# 停止定时任务
# 5. 卸载插件
if self.plugin_manager:
logger.info("[5/7] 卸载插件...")
await self.plugin_manager.unload_plugins()
logger.info("[5/7] 插件已卸载")
else:
logger.info("[5/7] 无插件需要卸载")
# 6. 停止定时任务
if scheduler.running:
logger.info("[6/7] 停止定时任务...")
scheduler.shutdown()
logger.info("[6/7] 定时任务已停止")
else:
logger.info("[6/7] 定时任务未运行")
# 7. 清理回调和销毁微信连接
logger.info("[7/7] 清理资源...")
remove_callback_handler(self)
clear_all_callbacks()
# 销毁微信连接
if self.loader:
self.loader.DestroyWeChat()
logger.success("机器人已停止")
# 输出最终性能报告
if self.performance_monitor:
logger.info("最终性能报告:")
self.performance_monitor.print_stats()
logger.success("=" * 60)
logger.success("机器人已优雅关闭")
logger.success("=" * 60)
async def main():