151 lines
6.7 KiB
Python
151 lines
6.7 KiB
Python
from __future__ import annotations
|
|
|
|
from collections import Counter
|
|
from typing import Dict, List
|
|
|
|
from db.message_storage import MessageStorageDB
|
|
from db.message_summary_db import MessageSummaryDBOperator
|
|
|
|
|
|
class GroupMemoryService:
|
|
DOMAIN_KEYWORDS = {
|
|
"openclaw": ["openclaw", "claw", "工作流", "节点", "编排", "接入", "联调"],
|
|
"robotics": ["机器人", "bot", "微信机器人", "插件", "自动化", "消息路由", "部署", "接口"],
|
|
"dota": ["dota", "dota2", "刀塔", "英雄", "出装", "对线", "团战", "版本"],
|
|
"tech": ["python", "docker", "redis", "mysql", "服务器", "报错", "脚本", "网络", "接口"],
|
|
"casual": ["吃饭", "睡觉", "上班", "下班", "周末", "唠嗑", "闲聊"],
|
|
}
|
|
HUMOR_KEYWORDS = ["哈哈", "笑死", "乐", "蚌", "绷不住", "离谱", "逆天", "节目效果", "抽象", "乐子"]
|
|
SHARPNESS_KEYWORDS = ["菜", "蠢", "逆天", "离谱", "抽象", "别搞", "别整", "你这", "搁这", "典"]
|
|
RELAXED_KEYWORDS = ["随便", "行吧", "都行", "慢慢来", "不急", "摸鱼", "唠", "水群", "先这样"]
|
|
SERIOUS_KEYWORDS = ["报错", "排查", "日志", "配置", "部署", "接口", "重现", "修复", "方案", "联调"]
|
|
|
|
def __init__(self, db_manager, config: Dict):
|
|
self.config = config or {}
|
|
self.message_db = MessageStorageDB(db_manager)
|
|
self.summary_db = MessageSummaryDBOperator(db_manager)
|
|
|
|
def build_group_memory_profile(self, room_id: str, group_name: str = "") -> Dict:
|
|
recent_messages = self.message_db.get_messages_for_summary(room_id, hours_ago=48, min_messages=20, max_hours=168, max_results=300) or []
|
|
summary_text = self._load_recent_summary_text(room_id)
|
|
topic_counter = Counter()
|
|
domain_counter = Counter()
|
|
humor_hits = 0
|
|
sharpness_hits = 0
|
|
relaxed_hits = 0
|
|
serious_hits = 0
|
|
short_message_count = 0
|
|
message_count = 0
|
|
|
|
for item in recent_messages:
|
|
content = str(item.get("content", "") or "").lower()
|
|
if not content:
|
|
continue
|
|
message_count += 1
|
|
if len(content) <= 8:
|
|
short_message_count += 1
|
|
for domain, keywords in self.DOMAIN_KEYWORDS.items():
|
|
hits = sum(1 for keyword in keywords if keyword and keyword.lower() in content)
|
|
if hits:
|
|
domain_counter[domain] += hits
|
|
for keyword in keywords:
|
|
if keyword and keyword.lower() in content:
|
|
topic_counter[keyword] += 1
|
|
humor_hits += self._count_hits(content, self.HUMOR_KEYWORDS)
|
|
sharpness_hits += self._count_hits(content, self.SHARPNESS_KEYWORDS)
|
|
relaxed_hits += self._count_hits(content, self.RELAXED_KEYWORDS)
|
|
serious_hits += self._count_hits(content, self.SERIOUS_KEYWORDS)
|
|
|
|
summary_lower = summary_text.lower()
|
|
for domain, keywords in self.DOMAIN_KEYWORDS.items():
|
|
hits = sum(1 for keyword in keywords if keyword and keyword.lower() in summary_lower)
|
|
if hits:
|
|
domain_counter[domain] += hits * 2
|
|
for keyword in keywords:
|
|
if keyword and keyword.lower() in summary_lower:
|
|
topic_counter[keyword] += 2
|
|
humor_hits += self._count_hits(summary_lower, self.HUMOR_KEYWORDS) * 2
|
|
sharpness_hits += self._count_hits(summary_lower, self.SHARPNESS_KEYWORDS) * 2
|
|
relaxed_hits += self._count_hits(summary_lower, self.RELAXED_KEYWORDS) * 2
|
|
serious_hits += self._count_hits(summary_lower, self.SERIOUS_KEYWORDS) * 2
|
|
|
|
inferred_domain = domain_counter.most_common(1)[0][0] if domain_counter else "general"
|
|
focus_topics = [item for item, _ in topic_counter.most_common(6)]
|
|
style_profile = self._infer_style_profile(
|
|
humor_hits=humor_hits,
|
|
sharpness_hits=sharpness_hits,
|
|
relaxed_hits=relaxed_hits,
|
|
serious_hits=serious_hits,
|
|
short_message_ratio=(short_message_count / message_count) if message_count else 0.0,
|
|
)
|
|
return {
|
|
"room_id": room_id,
|
|
"group_name": group_name,
|
|
"inferred_domain": inferred_domain,
|
|
"focus_topics": focus_topics,
|
|
"message_sample_count": len(recent_messages),
|
|
"summary_text": summary_text,
|
|
"style_profile": style_profile,
|
|
}
|
|
|
|
@staticmethod
|
|
def _count_hits(text: str, keywords: List[str]) -> int:
|
|
return sum(1 for keyword in keywords if keyword and keyword.lower() in text)
|
|
|
|
@staticmethod
|
|
def _infer_style_profile(
|
|
*,
|
|
humor_hits: int,
|
|
sharpness_hits: int,
|
|
relaxed_hits: int,
|
|
serious_hits: int,
|
|
short_message_ratio: float,
|
|
) -> Dict:
|
|
humor_style = "轻微"
|
|
if humor_hits >= 18:
|
|
humor_style = "中等偏上,能接梗"
|
|
elif humor_hits >= 8:
|
|
humor_style = "中等,可以带一点冷幽默"
|
|
|
|
sharpness_style = "轻微嘴硬,不刻薄"
|
|
if sharpness_hits >= 15:
|
|
sharpness_style = "允许轻微毒舌,但别上头"
|
|
elif sharpness_hits >= 7:
|
|
sharpness_style = "允许轻微嘴欠,但别刺人"
|
|
|
|
interaction_tone = "自然群友感"
|
|
if serious_hits >= max(relaxed_hits + 4, 10):
|
|
interaction_tone = "偏认真,问题导向"
|
|
elif relaxed_hits >= serious_hits + 4:
|
|
interaction_tone = "偏松弛,像熟人闲聊"
|
|
|
|
expressiveness_style = "克制"
|
|
if short_message_ratio >= 0.58 or relaxed_hits >= serious_hits + 4:
|
|
expressiveness_style = "松弛一点,像随口接话"
|
|
elif serious_hits >= 12:
|
|
expressiveness_style = "短句,偏干货"
|
|
|
|
return {
|
|
"interaction_tone": interaction_tone,
|
|
"humor_style": humor_style,
|
|
"sharpness_style": sharpness_style,
|
|
"expressiveness_style": expressiveness_style,
|
|
}
|
|
|
|
def _load_recent_summary_text(self, room_id: str) -> str:
|
|
candidates: List[Dict] = []
|
|
for summary_type in ("daily", "manual"):
|
|
sql = """
|
|
SELECT *
|
|
FROM t_message_summary
|
|
WHERE chatroom_id = %s AND summary_type = %s
|
|
ORDER BY period_end DESC, update_time DESC
|
|
LIMIT 1
|
|
"""
|
|
rows = self.summary_db.execute_query(sql, (room_id, summary_type)) or []
|
|
candidates.extend(rows)
|
|
if not candidates:
|
|
return ""
|
|
candidates.sort(key=lambda item: (str(item.get("period_end", "")), str(item.get("update_time", ""))), reverse=True)
|
|
return str(candidates[0].get("summary_text", "") or "").strip()
|