abot/plugins/ai_auto_response/context_builder.py

from __future__ import annotations

import re
from typing import Dict, List


class ContextBuilder:
    def __init__(self, recent_context_size: int = 30):
        self.recent_context_size = max(int(recent_context_size or 30), 1)

    def build(
        self,
        *,
        room_id: str,
        group_profile: Dict,
        sender: str,
        sender_name: str,
        content: str,
        recent_messages: List[Dict],
        member_context: Dict,
        trigger: Dict,
        flow_state: str,
        reply_mode: str,
        vector_memories: List[Dict],
        quote_context: Dict | None = None,
        image_context: Dict | None = None,
    ) -> Dict:
        selected_messages = self._select_recent_messages(recent_messages, sender, content, quote_context or {})
        recent_lines = []
        for item in selected_messages:
            msg_sender = item.get("sender_name") or item.get("sender") or "未知成员"
            msg_content = item.get("content") or item.get("message") or ""
            if msg_content:
                recent_lines.append(f"{msg_sender}: {msg_content}")
        return {
            "group_profile": group_profile or {"room_id": room_id},
            "speaker_profile": {
                "wxid": sender,
                "display_name": sender_name,
                "member_context": member_context or {},
            },
            "speaker_name_clean": self._clean_display_name(sender_name),
            "recent_message_items": self._build_recent_message_items(selected_messages),
            "recent_messages": recent_lines,
            "recent_summary": "",
            "trigger_type": trigger.get("trigger_type", "none"),
            "reply_mode": reply_mode,
            "flow_state": flow_state,
            "memory_prompt": self._build_member_memory_prompt(member_context),
            "vector_memory_prompt": self._build_vector_memory_prompt(vector_memories),
            "group_profile_prompt": self._build_group_profile_prompt(group_profile or {}),
            "quote_prompt": self._build_quote_prompt(quote_context or {}),
            "image_prompt": self._build_image_prompt(image_context or {}),
            "current_message": f"{sender_name}: {content}",
        }

    @staticmethod
    def _build_recent_message_items(messages: List[Dict]) -> List[Dict]:
        items: List[Dict] = []
        for idx, item in enumerate(messages, start=1):
            content = str(item.get("content") or item.get("message") or "").strip()
            if not content:
                continue
            items.append({
                "idx": idx,
                "sender": item.get("sender_name") or item.get("sender") or "未知成员",
                "content": content[:120],
                "is_at": bool(item.get("is_at")),
            })
        return items

    def _select_recent_messages(
        self,
        recent_messages: List[Dict],
        current_sender: str,
        current_content: str,
        quote_context: Dict,
    ) -> List[Dict]:
        if not recent_messages:
            return []
        window = recent_messages[-self.recent_context_size:]
        if len(window) <= 8:
            return window

        current_tokens = self._extract_topic_tokens(current_content)
        quote_tokens = self._extract_topic_tokens(
            f"{quote_context.get('title', '')} {quote_context.get('quote_body', '')}"
        )
        focus_tokens = current_tokens | quote_tokens
        quote_sender_name = str(quote_context.get("quote_sender_name", "") or "").strip().lower()

        scored: List[tuple[int, int, Dict]] = []
        for idx, item in enumerate(window):
            score = self._message_relevance(
                item,
                current_sender=current_sender,
                focus_tokens=focus_tokens,
                quote_sender_name=quote_sender_name,
            )
            if score > 0:
                scored.append((score, idx, item))

        # 总是保留尾部几条，维持现场感；再拼上与当前话题最相关的消息。
        tail_indexes = set(range(max(len(window) - 4, 0), len(window)))
        keep_indexes = set(tail_indexes)
        for _, idx, _ in sorted(scored, key=lambda x: (-x[0], -x[1]))[:10]:
            keep_indexes.add(idx)

        selected = [window[idx] for idx in sorted(keep_indexes)]
        if len(selected) < 6:
            return window[-6:]
        return selected[-12:]

    @classmethod
    def _message_relevance(
        cls,
        item: Dict,
        *,
        current_sender: str,
        focus_tokens: set[str],
        quote_sender_name: str,
    ) -> int:
        content = str(item.get("content") or item.get("message") or "").strip()
        if not content:
            return 0
        sender = str(item.get("sender", "") or "")
        sender_name = str(item.get("sender_name", "") or "").strip().lower()
        score = 0

        if sender == current_sender:
            score += 3
        if quote_sender_name and quote_sender_name in sender_name:
            score += 3
        if item.get("is_at"):
            score += 1

        if focus_tokens:
            tokens = cls._extract_topic_tokens(content)
            overlap = focus_tokens & tokens
            score += min(len(overlap) * 2, 6)
            if overlap and cls._looks_like_question_or_answer(content):
                score += 2
        elif sender == current_sender:
            score += 1

        if cls._looks_like_question_or_answer(content):
            score += 1
        return score

    @staticmethod
    def _looks_like_question_or_answer(content: str) -> bool:
        text = str(content or "").strip().lower()
        if not text:
            return False
        patterns = [
            r"\?$", r"？$", r"怎么", r"如何", r"为啥", r"为什么", r"能不能", r"可以吗",
            r"报错", r"试试", r"先", r"然后", r"配置", r"日志", r"接口", r"原因",
        ]
        return any(re.search(pattern, text, flags=re.IGNORECASE) for pattern in patterns)

    @staticmethod
    def _extract_topic_tokens(content: str) -> set[str]:
        text = str(content or "").lower()
        tokens = set(re.findall(r"[a-z0-9_\\-]{3,}", text))
        keywords = [
            "openclaw", "qdrant", "ollama", "docker", "python", "api", "插件", "机器人", "模型",
            "日志", "配置", "报错", "部署", "联网", "图片", "记忆", "群聊", "dota", "战绩",
        ]
        for keyword in keywords:
            if keyword in text:
                tokens.add(keyword)
        return tokens

    @staticmethod
    def _clean_display_name(sender_name: str) -> str:
        import re

        text = str(sender_name or "").strip()
        if not text:
            return ""
        text = re.sub(r"\s+", "", text)
        text = re.sub(r"[^\u4e00-\u9fffA-Za-z0-9_]", "", text)
        return text[:8]

    @staticmethod
    def _build_member_memory_prompt(member_context: Dict) -> str:
        if not member_context:
            return "暂无稳定成员画像。"
        meta = member_context.get("meta", {}) or {}
        topics = member_context.get("topics_of_interest", []) or []
        recent_focus = member_context.get("recent_focus", []) or []
        skills = ContextBuilder._stringify_items(meta.get("skill_profile", []), 5)
        stable_traits = ContextBuilder._stringify_items(meta.get("stable_traits", []), 4)
        habits = ContextBuilder._stringify_items(meta.get("habit_patterns", []), 4)
        reply_prefs = ContextBuilder._stringify_items(meta.get("long_term_reply_preferences", []), 4)
        recent_state = ContextBuilder._stringify_items(meta.get("recent_state", []), 4)
        reply_taboos = ContextBuilder._stringify_items(meta.get("reply_taboos", []), 3)
        lines = [
            f"成员摘要：{member_context.get('summary_text', '')}".strip(),
            f"互动风格：{member_context.get('interaction_style', '')}".strip(),
            f"回复偏好：{member_context.get('response_style_hint', '')}".strip(),
            f"长期主题：{', '.join(topics[:5])}" if topics else "",
            f"近期关注：{', '.join(recent_focus[:4])}" if recent_focus else "",
            f"技能侧重点：{skills}" if skills else "",
            f"稳定特征：{stable_traits}" if stable_traits else "",
            f"习惯模式：{habits}" if habits else "",
            f"长期回复偏好：{reply_prefs}" if reply_prefs else "",
            f"近期状态：{recent_state}" if recent_state else "",
            f"气质倾向：{meta.get('temperament_tendency', '')}".strip(),
            f"群内角色：{meta.get('group_role', '')}".strip(),
            f"回复禁忌：{reply_taboos}" if reply_taboos else "",
        ]
        return "\n".join([line for line in lines if line])

    @staticmethod
    def _stringify_items(items: List | str, limit: int) -> str:
        if isinstance(items, str):
            return items.strip()
        values: List[str] = []
        for item in items[:limit]:
            if isinstance(item, dict):
                value = str(
                    item.get("name")
                    or item.get("label")
                    or item.get("value")
                    or item.get("text")
                    or ""
                ).strip()
            else:
                value = str(item or "").strip()
            if value and value not in values:
                values.append(value)
        return ", ".join(values)

    @staticmethod
    def _build_vector_memory_prompt(vector_memories: List[Dict]) -> str:
        if not vector_memories:
            return ""
        lines = []
        for item in vector_memories[:2]:
            summary = item.get("content_summary") or item.get("summary_text") or item.get("text") or ""
            memory_type = item.get("memory_type", "memory")
            if summary:
                lines.append(f"[{memory_type}] {summary}")
        return "\n".join(lines)

    @staticmethod
    def _build_group_profile_prompt(group_profile: Dict) -> str:
        if not group_profile:
            return "当前群没有特殊知识域限制。"
        focus = ", ".join(group_profile.get("knowledge_focus", [])[:6])
        boundaries = ", ".join(group_profile.get("topic_boundaries", [])[:6])
        summary = str(group_profile.get("group_memory_summary", "") or "").replace("\n", " ").strip()
        if len(summary) > 120:
            summary = summary[:117] + "..."
        lines = [
            f"群模式：{group_profile.get('mode', 'social')}",
            f"知识域：{group_profile.get('knowledge_domain', 'general')}",
            f"配置知识域：{group_profile.get('configured_domain', 'general')}",
            f"历史推断知识域：{group_profile.get('group_memory_domain', 'general')}",
            f"回答风格：{group_profile.get('reply_style', '自然短句')}",
            f"互动调性：{group_profile.get('interaction_tone', '自然群友感')}",
            f"幽默强度：{group_profile.get('humor_style', '轻微')}",
            f"嘴硬程度：{group_profile.get('sharpness_style', '轻微嘴硬，不刻薄')}",
            f"表达松弛度：{group_profile.get('expressiveness_style', '克制')}",
            f"称呼强度：{group_profile.get('address_style', '低频称呼，默认直接接话')}",
            f"知识重点：{focus}" if focus else "",
            f"群长期摘要：{summary}" if summary else "",
            f"历史推断社交风格：{ContextBuilder._build_style_summary(group_profile.get('group_memory_style', {}))}"
            if group_profile.get("group_memory_style")
            else "",
            f"边界提醒：{boundaries}" if boundaries else "",
            f"人格叠加：{group_profile.get('persona_overlay', '')}".strip(),
        ]
        return "\n".join([line for line in lines if line])

    @staticmethod
    def _build_style_summary(style_profile: Dict) -> str:
        if not style_profile:
            return ""
        return " / ".join(
            [
                str(style_profile.get("interaction_tone", "") or "").strip(),
                str(style_profile.get("humor_style", "") or "").strip(),
                str(style_profile.get("sharpness_style", "") or "").strip(),
                str(style_profile.get("expressiveness_style", "") or "").strip(),
            ]
        ).strip(" /")

    @staticmethod
    def _build_quote_prompt(quote_context: Dict) -> str:
        if not quote_context:
            return ""
        quote_type = quote_context.get("quote_type_label", "引用消息")
        quote_sender = quote_context.get("quote_sender_name", "") or "未知成员"
        quote_body = quote_context.get("quote_body", "") or ""
        title = quote_context.get("title", "") or ""
        lines = [
            f"用户这次是在引用消息后发言。",
            f"引用类型：{quote_type}",
            f"被引用发送者：{quote_sender}",
            f"图片附件：已附带原图" if quote_context.get("has_image_attachment") else "",
            f"引用标题：{title}" if title else "",
            f"被引用内容：{quote_body}" if quote_body else "",
        ]
        return "\n".join([line for line in lines if line])

    @staticmethod
    def _build_image_prompt(image_context: Dict) -> str:
        if not image_context:
            return ""
        lines = [
            "已附带最近一张群图片作为上下文。",
            f"图片发送者：{image_context.get('sender_name', '未知成员')}",
            f"图片说明：{image_context.get('hint', '')}" if image_context.get("hint") else "",
        ]
        return "\n".join([line for line in lines if line])