abot/plugins/ai_auto_response/context/context_builder.py

from __future__ import annotations

import re
from typing import Dict, List


class ContextBuilder:
    def __init__(self, recent_context_size: int = 30):
        self.recent_context_size = max(int(recent_context_size or 30), 1)

    def build(
        self,
        *,
        room_id: str,
        group_profile: Dict,
        sender: str,
        sender_name: str,
        content: str,
        recent_messages: List[Dict],
        member_context: Dict,
        member_memory_focus: List[str] | None = None,
        trigger: Dict,
        flow_state: str,
        reply_mode: str,
        vector_memories: List[Dict],
        social_memory: Dict | None = None,
        group_facts: Dict | None = None,
        quote_context: Dict | None = None,
        image_context: Dict | None = None,
    ) -> Dict:
        selected_messages = self._select_recent_messages(recent_messages, sender, content, quote_context or {})
        recent_lines = []
        for item in selected_messages:
            msg_sender = item.get("sender_name") or item.get("sender") or "未知成员"
            msg_content = item.get("content") or item.get("message") or ""
            if msg_content:
                recent_lines.append(f"{msg_sender}: {msg_content}")
        return {
            "group_profile": group_profile or {"room_id": room_id},
            "speaker_profile": {
                "wxid": sender,
                "display_name": sender_name,
                "member_context": member_context or {},
            },
            "speaker_name_clean": self._clean_display_name(sender_name),
            "is_at": bool(trigger.get("is_at", False)),
            "is_directed": bool(trigger.get("is_directed", False)),
            "recent_message_items": self._build_recent_message_items(selected_messages),
            "recent_messages": recent_lines,
            "recent_summary": "",
            "trigger_type": trigger.get("trigger_type", "none"),
            "reply_mode": reply_mode,
            "flow_state": flow_state,
            "memory_prompt": self._build_member_memory_prompt(member_context, member_memory_focus or []),
            "at_member_profile_prompt": self._build_at_member_profile_prompt(
                member_context=member_context or {},
                focus_lines=member_memory_focus or [],
                is_at=bool(trigger.get("is_at", False)),
                is_directed=bool(trigger.get("is_directed", False)),
            ),
            "vector_memory_prompt": self._build_vector_memory_prompt(vector_memories),
            "social_memory_prompt": self._build_social_memory_prompt(social_memory or {}),
            "group_facts_prompt": self._build_group_facts_prompt(group_facts or {}),
            "group_profile_prompt": self._build_group_profile_prompt(group_profile or {}),
            "quote_prompt": self._build_quote_prompt(quote_context or {}),
            "image_prompt": self._build_image_prompt(image_context or {}),
            "image_safety_prompt": self._build_image_safety_prompt(
                (quote_context or {}).get("image_safety") or {}
            ),
            "current_message": f"{sender_name}: {content}",
        }

    @staticmethod
    def _build_recent_message_items(messages: List[Dict]) -> List[Dict]:
        items: List[Dict] = []
        for idx, item in enumerate(messages, start=1):
            content = str(item.get("content") or item.get("message") or "").strip()
            if not content:
                continue
            items.append({
                "idx": idx,
                "sender": item.get("sender_name") or item.get("sender") or "未知成员",
                "content": content[:120],
                "is_at": bool(item.get("is_at")),
            })
        return items

    def _select_recent_messages(
        self,
        recent_messages: List[Dict],
        current_sender: str,
        current_content: str,
        quote_context: Dict,
    ) -> List[Dict]:
        if not recent_messages:
            return []
        window = recent_messages[-self.recent_context_size:]
        if len(window) <= 8:
            return window

        current_tokens = self._extract_topic_tokens(current_content)
        quote_tokens = self._extract_topic_tokens(
            f"{quote_context.get('title', '')} {quote_context.get('quote_body', '')}"
        )
        focus_tokens = current_tokens | quote_tokens
        quote_sender_name = str(quote_context.get("quote_sender_name", "") or "").strip().lower()

        scored: List[tuple[int, int, Dict]] = []
        for idx, item in enumerate(window):
            score = self._message_relevance(
                item,
                current_sender=current_sender,
                focus_tokens=focus_tokens,
                quote_sender_name=quote_sender_name,
            )
            if score > 0:
                scored.append((score, idx, item))

        # 总是保留尾部几条，维持现场感；再拼上与当前话题最相关的消息。
        tail_indexes = set(range(max(len(window) - 4, 0), len(window)))
        keep_indexes = set(tail_indexes)
        for _, idx, _ in sorted(scored, key=lambda x: (-x[0], -x[1]))[:10]:
            keep_indexes.add(idx)

        selected = [window[idx] for idx in sorted(keep_indexes)]
        if len(selected) < 6:
            return window[-6:]
        return selected[-12:]

    @classmethod
    def _message_relevance(
        cls,
        item: Dict,
        *,
        current_sender: str,
        focus_tokens: set[str],
        quote_sender_name: str,
    ) -> int:
        content = str(item.get("content") or item.get("message") or "").strip()
        if not content:
            return 0
        sender = str(item.get("sender", "") or "")
        sender_name = str(item.get("sender_name", "") or "").strip().lower()
        score = 0

        if sender == current_sender:
            score += 3
        if quote_sender_name and quote_sender_name in sender_name:
            score += 3
        if item.get("is_at"):
            score += 1

        if focus_tokens:
            tokens = cls._extract_topic_tokens(content)
            overlap = focus_tokens & tokens
            score += min(len(overlap) * 2, 6)
            if overlap and cls._looks_like_question_or_answer(content):
                score += 2
        elif sender == current_sender:
            score += 1

        if cls._looks_like_question_or_answer(content):
            score += 1
        return score

    @staticmethod
    def _looks_like_question_or_answer(content: str) -> bool:
        text = str(content or "").strip().lower()
        if not text:
            return False
        patterns = [
            r"\?$", r"？$", r"怎么", r"如何", r"为啥", r"为什么", r"能不能", r"可以吗",
            r"报错", r"试试", r"先", r"然后", r"配置", r"日志", r"接口", r"原因",
        ]
        return any(re.search(pattern, text, flags=re.IGNORECASE) for pattern in patterns)

    @staticmethod
    def _extract_topic_tokens(content: str) -> set[str]:
        text = str(content or "").lower()
        tokens = set(re.findall(r"[a-z0-9_\\-]{3,}", text))
        keywords = [
            "openclaw", "qdrant", "ollama", "docker", "python", "api", "插件", "机器人", "模型",
            "日志", "配置", "报错", "部署", "联网", "图片", "记忆", "群聊", "dota", "战绩",
        ]
        for keyword in keywords:
            if keyword in text:
                tokens.add(keyword)
        return tokens

    @staticmethod
    def _clean_display_name(sender_name: str) -> str:
        import re

        text = str(sender_name or "").strip()
        if not text:
            return ""
        text = re.sub(r"\s+", "", text)
        text = re.sub(r"[^\u4e00-\u9fffA-Za-z0-9_]", "", text)
        return text[:8]

    @staticmethod
    def _build_member_memory_prompt(member_context: Dict, focus_lines: List[str] | None = None) -> str:
        if not member_context:
            return "暂无稳定成员画像。"
        meta = member_context.get("meta", {}) or {}
        topics = member_context.get("topics_of_interest", []) or []
        recent_focus = member_context.get("recent_focus", []) or []
        common_scenarios = ContextBuilder._stringify_items(meta.get("common_scenarios", []), 4)
        skills = ContextBuilder._stringify_items(meta.get("skill_profile", []), 5)
        problem_solving = ContextBuilder._stringify_items(meta.get("problem_solving_profile", []), 4)
        stable_traits = ContextBuilder._stringify_items(meta.get("stable_traits", []), 4)
        habits = ContextBuilder._stringify_items(meta.get("habit_patterns", []), 4)
        expression_profile = ContextBuilder._stringify_items(meta.get("expression_profile", []), 4)
        reply_entry = ContextBuilder._stringify_items(meta.get("reply_entry_profile", []), 4)
        reply_prefs = ContextBuilder._stringify_items(meta.get("long_term_reply_preferences", []), 4)
        recent_state = ContextBuilder._stringify_items(meta.get("recent_state", []), 4)
        reply_taboos = ContextBuilder._stringify_items(meta.get("reply_taboos", []), 3)
        lines = [
            f"成员摘要：{member_context.get('summary_text', '')}".strip(),
            f"互动风格：{member_context.get('interaction_style', '')}".strip(),
            f"回复偏好：{member_context.get('response_style_hint', '')}".strip(),
            f"本次相关记忆：{'；'.join((focus_lines or [])[:4])}" if focus_lines else "",
            f"长期主题：{', '.join(topics[:5])}" if topics else "",
            f"近期关注：{', '.join(recent_focus[:4])}" if recent_focus else "",
            f"常见发言场景：{common_scenarios}" if common_scenarios else "",
            f"技能侧重点：{skills}" if skills else "",
            f"处理问题方式：{problem_solving}" if problem_solving else "",
            f"稳定特征：{stable_traits}" if stable_traits else "",
            f"习惯模式：{habits}" if habits else "",
            f"表达标记：{expression_profile}" if expression_profile else "",
            f"有效接话点：{reply_entry}" if reply_entry else "",
            f"长期回复偏好：{reply_prefs}" if reply_prefs else "",
            f"近期状态：{recent_state}" if recent_state else "",
            f"气质倾向：{meta.get('temperament_tendency', '')}".strip(),
            f"群内角色：{meta.get('group_role', '')}".strip(),
            f"回复禁忌：{reply_taboos}" if reply_taboos else "",
        ]
        return "\n".join([line for line in lines if line])

    @staticmethod
    def _build_at_member_profile_prompt(
        member_context: Dict,
        focus_lines: List[str] | None = None,
        is_at: bool = False,
        is_directed: bool = False,
    ) -> str:
        # 只有明确 @ 或强定向时才给“高优先级成员画像”，避免平时过度套人设
        if not (is_at or is_directed):
            return ""
        if not member_context:
            return "本次是对方点名发起，但暂无稳定画像，按自然群友口吻短回复。"

        meta = member_context.get("meta", {}) or {}
        summary = str(member_context.get("summary_text", "") or "").strip()
        interaction_style = str(member_context.get("interaction_style", "") or "").strip()
        response_hint = str(member_context.get("response_style_hint", "") or "").strip()
        topics = ContextBuilder._stringify_items(member_context.get("topics_of_interest", []) or [], 4)
        focus = "；".join((focus_lines or [])[:3]).strip()
        lines = [
            "本次为点名互动，优先参考该成员画像后再回复：",
            f"成员摘要：{summary}" if summary else "",
            f"互动风格：{interaction_style}" if interaction_style else "",
            f"偏好回复方式：{response_hint}" if response_hint else "",
            f"近期相关记忆：{focus}" if focus else "",
            f"长期兴趣：{topics}" if topics else "",
            f"禁忌提醒：{ContextBuilder._stringify_items(meta.get('reply_taboos', []), 3)}"
            if meta.get("reply_taboos")
            else "",
            "语气要像熟悉的群友，短句、自然，不要客服腔。",
        ]
        return "\n".join([line for line in lines if line])

    @staticmethod
    def _stringify_items(items: List | str, limit: int) -> str:
        if isinstance(items, str):
            return items.strip()
        values: List[str] = []
        for item in items[:limit]:
            if isinstance(item, dict):
                value = str(
                    item.get("name")
                    or item.get("label")
                    or item.get("value")
                    or item.get("text")
                    or ""
                ).strip()
            else:
                value = str(item or "").strip()
            if value and value not in values:
                values.append(value)
        return ", ".join(values)

    @staticmethod
    def _build_vector_memory_prompt(vector_memories: List[Dict]) -> str:
        if not vector_memories:
            return ""
        lines = []
        for item in vector_memories[:2]:
            summary = item.get("content_summary") or item.get("summary_text") or item.get("text") or ""
            memory_type = item.get("memory_type", "memory")
            if summary:
                lines.append(f"[{memory_type}] {summary}")
        return "\n".join(lines)

    @staticmethod
    def _build_social_memory_prompt(social_memory: Dict) -> str:
        prompt = str((social_memory or {}).get("prompt", "") or "").strip()
        return prompt

    @staticmethod
    def _build_group_facts_prompt(group_facts: Dict) -> str:
        return str((group_facts or {}).get("prompt", "") or "").strip()

    @staticmethod
    def _build_group_profile_prompt(group_profile: Dict) -> str:
        if not group_profile:
            return "当前群没有特殊知识域限制。"
        focus = ", ".join(group_profile.get("knowledge_focus", [])[:6])
        boundaries = ", ".join(group_profile.get("topic_boundaries", [])[:6])
        summary = ContextBuilder._compact_group_summary(str(group_profile.get("group_memory_summary", "") or ""))
        lines = [
            f"群模式：{group_profile.get('mode', 'social')}",
            f"知识域偏向：{group_profile.get('knowledge_domain', 'general')}（仅作理解倾向，不是每次都要显式提到）",
            f"配置知识域：{group_profile.get('configured_domain', 'general')}（仅在当前话题相关时参考）",
            f"历史推断知识域：{group_profile.get('group_memory_domain', 'general')}（弱参考）",
            f"回答风格：{group_profile.get('reply_style', '自然短句')}",
            f"互动调性：{group_profile.get('interaction_tone', '自然群友感')}",
            f"幽默强度：{group_profile.get('humor_style', '轻微')}",
            f"嘴硬程度：{group_profile.get('sharpness_style', '轻微嘴硬，不刻薄')}",
            f"表达松弛度：{group_profile.get('expressiveness_style', '克制')}",
            f"称呼强度：{group_profile.get('address_style', '低频称呼，默认直接接话')}",
            f"可能相关的话题背景：{focus}" if focus else "",
            f"群长期摘要关键句：{summary}" if summary else "",
            f"历史推断社交风格：{ContextBuilder._build_style_summary(group_profile.get('group_memory_style', {}))}"
            if group_profile.get("group_memory_style")
            else "",
            f"边界提醒：{boundaries}" if boundaries else "",
            f"人格叠加：{group_profile.get('persona_overlay', '')}".strip(),
        ]
        return "\n".join([line for line in lines if line])

    @staticmethod
    def _build_style_summary(style_profile: Dict) -> str:
        if not style_profile:
            return ""
        return " / ".join(
            [
                str(style_profile.get("interaction_tone", "") or "").strip(),
                str(style_profile.get("humor_style", "") or "").strip(),
                str(style_profile.get("sharpness_style", "") or "").strip(),
                str(style_profile.get("expressiveness_style", "") or "").strip(),
            ]
        ).strip(" /")

    @staticmethod
    def _compact_group_summary(summary_text: str, max_chars: int = 420, max_sentences: int = 6) -> str:
        text = str(summary_text or "").strip()
        if not text:
            return ""
        text = re.sub(r"\s+", " ", text.replace("\n", " ").replace("\r", " ")).strip()
        if len(text) <= max_chars:
            return text

        # 长摘要按句提炼，优先保留“结论/风险/动作/配置”等关键词句，避免简单截断丢重点。
        sentences = [part.strip(" ，,；;。.!?！？:：") for part in re.split(r"[。！？!?；;\n]+", text) if part.strip()]
        if not sentences:
            return text[: max_chars - 3] + "..."

        key_patterns = [
            r"结论|核心|重点|关键|建议|方案|步骤|原因|影响|风险|注意|问题|异常|报错|故障|超时|阻塞",
            r"配置|参数|阈值|策略|限制|回退|优化|修复|排查|上线|回滚|依赖|版本|兼容",
        ]
        scored: List[tuple[int, int, str]] = []
        for idx, sentence in enumerate(sentences):
            score = 0
            for pattern in key_patterns:
                if re.search(pattern, sentence, flags=re.IGNORECASE):
                    score += 3
            if re.search(r"\d", sentence):
                score += 1
            if 8 <= len(sentence) <= 80:
                score += 1
            if idx == 0 or idx == len(sentences) - 1:
                score += 1
            scored.append((score, idx, sentence))

        chosen_indexes = {0, len(sentences) - 1}
        for _, idx, _ in sorted(scored, key=lambda x: (-x[0], x[1])):
            chosen_indexes.add(idx)
            if len(chosen_indexes) >= max_sentences:
                break

        chosen = [sentences[idx] for idx in sorted(chosen_indexes)]
        merged = "；".join([item for item in chosen if item]).strip("；")
        if len(merged) <= max_chars:
            return merged
        return merged[: max_chars - 3].rstrip(" ，,；;。.!?！？:：") + "..."

    @staticmethod
    def _build_quote_prompt(quote_context: Dict) -> str:
        if not quote_context:
            return ""
        quote_type = quote_context.get("quote_type_label", "引用消息")
        quote_sender = quote_context.get("quote_sender_name", "") or "未知成员"
        quote_body = quote_context.get("quote_body", "") or ""
        title = quote_context.get("title", "") or ""
        lines = [
            f"用户这次是在引用消息后发言。",
            f"引用类型：{quote_type}",
            f"被引用发送者：{quote_sender}",
            f"图片附件：已附带原图" if quote_context.get("has_image_attachment") else "",
            f"引用标题：{title}" if title else "",
            f"被引用内容：{quote_body}" if quote_body else "",
        ]
        return "\n".join([line for line in lines if line])

    @staticmethod
    def _build_image_prompt(image_context: Dict) -> str:
        if not image_context:
            return ""
        lines = [
            "已附带最近一张群图片作为上下文。",
            f"图片发送者：{image_context.get('sender_name', '未知成员')}",
            f"图片说明：{image_context.get('hint', '')}" if image_context.get("hint") else "",
        ]
        return "\n".join([line for line in lines if line])

    @staticmethod
    def _build_image_safety_prompt(image_safety: Dict) -> str:
        if not image_safety or not image_safety.get("suspected"):
            return ""
        if image_safety.get("has_visual_context"):
            return "当前发言疑似是在评论图片，但本次已附带图片上下文，可以基于图片谨慎理解。"
        reason = str(image_safety.get("reason", "") or "").strip()
        lines = [
            "当前发言疑似是在评论图片，但你这次没有看到图片本身。",
            f"原因：{reason}" if reason else "",
            "不要假装看过图，不要直接评价画面细节、人物状态、构图、文字内容或颜色元素。",
            "如果要回，只能轻微承认信息不足，或请对方引用图片/补一句文字说明，再继续。",
        ]
        return "\n".join([line for line in lines if line])