abot/plugins/ai_auto_response/group_memory.py

from __future__ import annotations

from collections import Counter
from typing import Dict, List

from db.message_storage import MessageStorageDB
from db.message_summary_db import MessageSummaryDBOperator


class GroupMemoryService:
    DOMAIN_KEYWORDS = {
        "openclaw": ["openclaw", "claw", "工作流", "节点", "编排", "接入", "联调"],
        "robotics": ["机器人", "bot", "微信机器人", "插件", "自动化", "消息路由", "部署", "接口"],
        "dota": ["dota", "dota2", "刀塔", "英雄", "出装", "对线", "团战", "版本"],
        "tech": ["python", "docker", "redis", "mysql", "服务器", "报错", "脚本", "网络", "接口"],
        "casual": ["吃饭", "睡觉", "上班", "下班", "周末", "唠嗑", "闲聊"],
    }
    HUMOR_KEYWORDS = ["哈哈", "笑死", "乐", "蚌", "绷不住", "离谱", "逆天", "节目效果", "抽象", "乐子"]
    SHARPNESS_KEYWORDS = ["菜", "蠢", "逆天", "离谱", "抽象", "别搞", "别整", "你这", "搁这", "典"]
    RELAXED_KEYWORDS = ["随便", "行吧", "都行", "慢慢来", "不急", "摸鱼", "唠", "水群", "先这样"]
    SERIOUS_KEYWORDS = ["报错", "排查", "日志", "配置", "部署", "接口", "重现", "修复", "方案", "联调"]

    def __init__(self, db_manager, config: Dict):
        self.config = config or {}
        self.message_db = MessageStorageDB(db_manager)
        self.summary_db = MessageSummaryDBOperator(db_manager)

    def build_group_memory_profile(self, room_id: str, group_name: str = "") -> Dict:
        recent_messages = self.message_db.get_messages_for_summary(room_id, hours_ago=48, min_messages=20, max_hours=168, max_results=300) or []
        summary_text = self._load_recent_summary_text(room_id)
        topic_counter = Counter()
        domain_counter = Counter()
        humor_hits = 0
        sharpness_hits = 0
        relaxed_hits = 0
        serious_hits = 0
        short_message_count = 0
        message_count = 0

        for item in recent_messages:
            content = str(item.get("content", "") or "").lower()
            if not content:
                continue
            message_count += 1
            if len(content) <= 8:
                short_message_count += 1
            for domain, keywords in self.DOMAIN_KEYWORDS.items():
                hits = sum(1 for keyword in keywords if keyword and keyword.lower() in content)
                if hits:
                    domain_counter[domain] += hits
                    for keyword in keywords:
                        if keyword and keyword.lower() in content:
                            topic_counter[keyword] += 1
            humor_hits += self._count_hits(content, self.HUMOR_KEYWORDS)
            sharpness_hits += self._count_hits(content, self.SHARPNESS_KEYWORDS)
            relaxed_hits += self._count_hits(content, self.RELAXED_KEYWORDS)
            serious_hits += self._count_hits(content, self.SERIOUS_KEYWORDS)

        summary_lower = summary_text.lower()
        for domain, keywords in self.DOMAIN_KEYWORDS.items():
            hits = sum(1 for keyword in keywords if keyword and keyword.lower() in summary_lower)
            if hits:
                domain_counter[domain] += hits * 2
                for keyword in keywords:
                    if keyword and keyword.lower() in summary_lower:
                        topic_counter[keyword] += 2
        humor_hits += self._count_hits(summary_lower, self.HUMOR_KEYWORDS) * 2
        sharpness_hits += self._count_hits(summary_lower, self.SHARPNESS_KEYWORDS) * 2
        relaxed_hits += self._count_hits(summary_lower, self.RELAXED_KEYWORDS) * 2
        serious_hits += self._count_hits(summary_lower, self.SERIOUS_KEYWORDS) * 2

        inferred_domain = domain_counter.most_common(1)[0][0] if domain_counter else "general"
        focus_topics = [item for item, _ in topic_counter.most_common(6)]
        style_profile = self._infer_style_profile(
            humor_hits=humor_hits,
            sharpness_hits=sharpness_hits,
            relaxed_hits=relaxed_hits,
            serious_hits=serious_hits,
            short_message_ratio=(short_message_count / message_count) if message_count else 0.0,
        )
        return {
            "room_id": room_id,
            "group_name": group_name,
            "inferred_domain": inferred_domain,
            "focus_topics": focus_topics,
            "message_sample_count": len(recent_messages),
            "summary_text": summary_text,
            "style_profile": style_profile,
        }

    @staticmethod
    def _count_hits(text: str, keywords: List[str]) -> int:
        return sum(1 for keyword in keywords if keyword and keyword.lower() in text)

    @staticmethod
    def _infer_style_profile(
        *,
        humor_hits: int,
        sharpness_hits: int,
        relaxed_hits: int,
        serious_hits: int,
        short_message_ratio: float,
    ) -> Dict:
        humor_style = "轻微"
        if humor_hits >= 18:
            humor_style = "中等偏上，能接梗"
        elif humor_hits >= 8:
            humor_style = "中等，可以带一点冷幽默"

        sharpness_style = "轻微嘴硬，不刻薄"
        if sharpness_hits >= 15:
            sharpness_style = "允许轻微毒舌，但别上头"
        elif sharpness_hits >= 7:
            sharpness_style = "允许轻微嘴欠，但别刺人"

        interaction_tone = "自然群友感"
        if serious_hits >= max(relaxed_hits + 4, 10):
            interaction_tone = "偏认真，问题导向"
        elif relaxed_hits >= serious_hits + 4:
            interaction_tone = "偏松弛，像熟人闲聊"

        expressiveness_style = "克制"
        if short_message_ratio >= 0.58 or relaxed_hits >= serious_hits + 4:
            expressiveness_style = "松弛一点，像随口接话"
        elif serious_hits >= 12:
            expressiveness_style = "短句，偏干货"

        return {
            "interaction_tone": interaction_tone,
            "humor_style": humor_style,
            "sharpness_style": sharpness_style,
            "expressiveness_style": expressiveness_style,
        }

    def _load_recent_summary_text(self, room_id: str) -> str:
        candidates: List[Dict] = []
        for summary_type in ("daily", "manual"):
            sql = """
            SELECT *
            FROM t_message_summary
            WHERE chatroom_id = %s AND summary_type = %s
            ORDER BY period_end DESC, update_time DESC
            LIMIT 1
            """
            rows = self.summary_db.execute_query(sql, (room_id, summary_type)) or []
            candidates.extend(rows)
        if not candidates:
            return ""
        candidates.sort(key=lambda item: (str(item.get("period_end", "")), str(item.get("update_time", ""))), reverse=True)
        return str(candidates[0].get("summary_text", "") or "").strip()