252 lines
11 KiB
Python
252 lines
11 KiB
Python
import re
|
||
from datetime import datetime, time, timedelta
|
||
import toml
|
||
import os
|
||
|
||
|
||
class InterventionBot:
|
||
def __init__(self, config_path=None):
|
||
# 加载配置
|
||
self.config = {}
|
||
if config_path and os.path.exists(config_path):
|
||
self.config = toml.load(config_path)
|
||
|
||
# 从配置中获取关键词和阈值
|
||
keywords = self.config.get("Keywords", {})
|
||
time_window = self.config.get("TimeWindow", {})
|
||
reply_threshold = self.config.get("ReplyThreshold", {})
|
||
|
||
# 表情符号库
|
||
self.emojis = keywords.get("emojis",
|
||
["[捂脸]", "[奸笑]", "[可怜]", "[擦汗]", "[发呆]", "[抠鼻]", "[破涕为笑]", "[旺柴]"])
|
||
# 话题关键词
|
||
self.hot_topics = keywords.get("hot_topics",
|
||
["咖啡", "手机", "小米", "华为", "苹果", "价格", "流畅", "螺蛳粉", "外卖"])
|
||
self.fish_keywords = keywords.get("fish_keywords",
|
||
["鱼缸", "鱼便", "红边", "造浪", "养鱼", "进货", "鳑鲏", "吸鳅"])
|
||
self.tech_keywords = keywords.get("tech_keywords",
|
||
["MIUI", "鸿蒙", "iPhone", "安卓", "推送", "充电", "屏幕", "电池"])
|
||
self.mechanism_keywords = keywords.get("mechanism_keywords",
|
||
["积分", "AI ", "功能列表", "黑丝", "打劫", "指令"])
|
||
self.news_keywords = keywords.get("news_keywords", ["新闻", "骨灰房", "法院", "判决", "住建局"])
|
||
|
||
# 早晨签到时间窗口
|
||
morning_start_hour = time_window.get("morning_start_hour", 8)
|
||
morning_start_minute = time_window.get("morning_start_minute", 0)
|
||
morning_end_hour = time_window.get("morning_end_hour", 8)
|
||
morning_end_minute = time_window.get("morning_end_minute", 30)
|
||
self.morning_window = (
|
||
time(morning_start_hour, morning_start_minute),
|
||
time(morning_end_hour, morning_end_minute)
|
||
)
|
||
|
||
# 回复阈值配置
|
||
self.messages_per_minute_threshold = reply_threshold.get("messages_per_minute_threshold", 3)
|
||
self.analysis_window_minutes = reply_threshold.get("analysis_window_minutes", 5)
|
||
|
||
def is_morning_window(self, timestamp):
|
||
"""检查是否在早晨签到时间窗口"""
|
||
try:
|
||
# 处理不同类型的时间戳
|
||
if isinstance(timestamp, float):
|
||
message_datetime = datetime.fromtimestamp(timestamp)
|
||
message_time = message_datetime.time()
|
||
elif isinstance(timestamp, str):
|
||
try:
|
||
message_time = datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S").time()
|
||
except ValueError:
|
||
try:
|
||
message_time = datetime.fromtimestamp(float(timestamp)).time()
|
||
except:
|
||
return False
|
||
else:
|
||
return False
|
||
|
||
return self.morning_window[0] <= message_time <= self.morning_window[1]
|
||
except Exception as e:
|
||
print(f"[早晨窗口检测] 错误: {e}")
|
||
return False
|
||
|
||
def detect_topic(self, message):
|
||
"""检测消息所属话题类型"""
|
||
if not isinstance(message, str):
|
||
return None
|
||
message_lower = message.lower()
|
||
if any(keyword in message_lower for keyword in self.fish_keywords):
|
||
return "fish"
|
||
if any(keyword in message_lower for keyword in self.tech_keywords):
|
||
return "tech"
|
||
if any(keyword in message_lower for keyword in self.mechanism_keywords):
|
||
return "mechanism"
|
||
if any(keyword in message_lower for keyword in self.news_keywords):
|
||
return "news"
|
||
if any(keyword in message_lower for keyword in self.hot_topics):
|
||
return "hot_topic"
|
||
return None
|
||
|
||
def rule_morning_signin(self, timestamp, messages):
|
||
"""规则1:早晨签到"""
|
||
return self.is_morning_window(timestamp) and any("签到" in msg or "早" in msg for msg in messages[-5:])
|
||
|
||
def rule_hot_topic(self, message, messages):
|
||
"""规则2:热点话题参与"""
|
||
return self.detect_topic(message) == "hot_topic" and len(
|
||
[m for m in messages[-5:] if self.detect_topic(m) == "hot_topic"]) >= 3
|
||
|
||
def rule_tech_discussion(self, message, messages):
|
||
"""规则3:技术性讨论"""
|
||
return self.detect_topic(message) == "tech"
|
||
|
||
def rule_fish_discussion(self, message, messages):
|
||
"""规则4:养鱼话题"""
|
||
return self.detect_topic(message) == "fish"
|
||
|
||
def rule_mechanism_interaction(self, message, messages):
|
||
"""规则5:群内机制互动"""
|
||
return self.detect_topic(message) == "mechanism"
|
||
|
||
def rule_humor_tease(self, message, messages):
|
||
"""规则6:幽默与调侃"""
|
||
return any(emoji in message for emoji in self.emojis) or "哈哈" in message or len(
|
||
[m for m in messages[-5:] if any(e in m for e in self.emojis)]) >= 2
|
||
|
||
def rule_news_reaction(self, message, messages):
|
||
"""规则7:猎奇或社会新闻反应"""
|
||
return self.detect_topic(message) == "news"
|
||
|
||
def rule_high_reply_rate(self, timestamp, chat_log):
|
||
"""规则8:高回复频率(每分钟消息数超过阈值)"""
|
||
try:
|
||
# 检查timestamp类型并转换
|
||
if isinstance(timestamp, float):
|
||
# 如果是浮点数(Unix时间戳),转换为datetime对象
|
||
current_time = datetime.fromtimestamp(timestamp)
|
||
elif isinstance(timestamp, str):
|
||
# 如果是字符串,尝试解析
|
||
try:
|
||
current_time = datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S")
|
||
except ValueError:
|
||
# 如果字符串格式不匹配,尝试将其转换为浮点数再处理
|
||
try:
|
||
current_time = datetime.fromtimestamp(float(timestamp))
|
||
except:
|
||
# 如果转换失败,使用当前时间
|
||
current_time = datetime.now()
|
||
else:
|
||
# 其他类型,使用当前时间
|
||
current_time = datetime.now()
|
||
|
||
window_start = current_time - timedelta(minutes=self.analysis_window_minutes)
|
||
|
||
# 计算时间窗口内的消息数
|
||
recent_messages = []
|
||
for msg in chat_log:
|
||
try:
|
||
# 同样处理消息时间戳
|
||
msg_timestamp = msg.get("timestamp")
|
||
if isinstance(msg_timestamp, float):
|
||
msg_time = datetime.fromtimestamp(msg_timestamp)
|
||
elif isinstance(msg_timestamp, str):
|
||
try:
|
||
msg_time = datetime.strptime(msg_timestamp, "%Y-%m-%d %H:%M:%S")
|
||
except ValueError:
|
||
try:
|
||
msg_time = datetime.fromtimestamp(float(msg_timestamp))
|
||
except:
|
||
continue
|
||
else:
|
||
continue
|
||
|
||
if window_start <= msg_time <= current_time:
|
||
recent_messages.append(msg)
|
||
except (ValueError, KeyError, TypeError):
|
||
continue
|
||
|
||
# 如果消息太少,不触发
|
||
if len(recent_messages) < self.messages_per_minute_threshold:
|
||
return False
|
||
|
||
# 计算消息频率
|
||
messages_per_minute = len(recent_messages) / self.analysis_window_minutes
|
||
|
||
# 记录日志,便于调试
|
||
if messages_per_minute >= self.messages_per_minute_threshold:
|
||
print(f"[高频率检测] 当前消息频率: {messages_per_minute:.2f}/分钟,阈值: {self.messages_per_minute_threshold}/分钟")
|
||
|
||
return messages_per_minute >= self.messages_per_minute_threshold
|
||
except Exception as e:
|
||
print(f"[高频率检测] 错误: {e}")
|
||
return False
|
||
|
||
def should_intervene(self, timestamp, message, messages, chat_log):
|
||
"""判断是否需要介入"""
|
||
rules = [
|
||
self.rule_morning_signin,
|
||
self.rule_hot_topic,
|
||
self.rule_tech_discussion,
|
||
self.rule_fish_discussion,
|
||
self.rule_mechanism_interaction,
|
||
self.rule_humor_tease,
|
||
self.rule_news_reaction,
|
||
self.rule_high_reply_rate
|
||
]
|
||
|
||
for rule in rules:
|
||
if rule == self.rule_morning_signin:
|
||
if rule(timestamp, messages):
|
||
return True
|
||
elif rule == self.rule_high_reply_rate:
|
||
if rule(timestamp, chat_log):
|
||
return True
|
||
elif rule(message, messages):
|
||
return True
|
||
return False
|
||
|
||
def process_message(self, timestamp, message, messages, chat_log):
|
||
"""处理单条消息,返回介入状态"""
|
||
if self.should_intervene(timestamp, message, messages, chat_log):
|
||
return True
|
||
return False
|
||
|
||
def process_chat_log(self, chat_log):
|
||
"""处理聊天记录,返回每条消息的介入状态"""
|
||
messages = [line["message"] for line in chat_log]
|
||
results = []
|
||
|
||
for i, line in enumerate(chat_log):
|
||
timestamp = line["timestamp"]
|
||
message = line["message"]
|
||
intervention = self.process_message(timestamp, message, messages[:i + 1], chat_log)
|
||
results.append({
|
||
"timestamp": timestamp,
|
||
"message": message,
|
||
"intervention": intervention
|
||
})
|
||
|
||
return results
|
||
|
||
|
||
# 示例用法
|
||
if __name__ == "__main__":
|
||
# 模拟聊天记录
|
||
sample_chat_log = [
|
||
{"timestamp": "2025-03-14 08:06:38", "user_id": "Jyunere", "message": "签到"},
|
||
{"timestamp": "2025-03-14 08:06:54", "user_id": "Jyunere", "message": "啥情况?卷了?"},
|
||
{"timestamp": "2025-03-14 08:07:20", "user_id": "wxid_qx4z0jq3rp3122", "message": "那你喝咖啡就好了"},
|
||
{"timestamp": "2025-03-14 09:12:28", "user_id": "Jyunere", "message": "我同事的鸿蒙确实流畅。"},
|
||
{"timestamp": "2025-03-14 09:35:21", "user_id": "Jyunere", "message": "垃圾MIUI"},
|
||
{"timestamp": "2025-05-21 14:31:57", "user_id": "wxid_4re8ddo26dxb52", "message": "年轻人随随便便就能深蹲200"},
|
||
{"timestamp": "2025-05-21 14:32:20", "user_id": "liu79830956",
|
||
"message": "@水牛 过分了啊,报错还扣积分 赔我200"},
|
||
{"timestamp": "2025-05-21 14:32:39", "user_id": "Jyunere", "message": "哈哈,识别到指令了。"},
|
||
{"timestamp": "2025-05-21 14:32:42", "user_id": "wxid_z8uo70zywfpn12", "message": "检测到天 气了"},
|
||
{"timestamp": "2025-05-21 14:35:08", "user_id": "liu79830956", "message": "这螺蛳粉估计要明天也吃不上了[旺柴]"}
|
||
]
|
||
|
||
bot = InterventionBot()
|
||
results = bot.process_chat_log(sample_chat_log)
|
||
|
||
for result in results:
|
||
print(f"[{result['timestamp']}] Message: {result['message']}")
|
||
print(f"Intervention: {result['intervention']}")
|
||
print("-" * 50) |