abot/plugins/ai_auto_response/context/context_builder.py

from __future__ import annotations

import re
from typing import Dict, List


class ContextBuilder:
    def __init__(self, recent_context_size: int = 30):
        self.recent_context_size = max(int(recent_context_size or 30), 1)

    def build(
        self,
        *,
        room_id: str,
        group_profile: Dict,
        sender: str,
        sender_name: str,
        content: str,
        recent_messages: List[Dict],
        member_context: Dict,
        member_memory_focus: List[str] | None = None,
        trigger: Dict,
        flow_state: str,
        reply_mode: str,
        vector_memories: List[Dict],
        social_memory: Dict | None = None,
        group_facts: Dict | None = None,
        quote_context: Dict | None = None,
        image_context: Dict | None = None,
    ) -> Dict:
        selected_messages = self._select_recent_messages(recent_messages, sender, content, quote_context or {})
        recent_lines = []
        for idx, item in enumerate(selected_messages, start=1):
            msg_sender = item.get("sender_name") or item.get("sender") or "未知成员"
            msg_content = item.get("content") or item.get("message") or ""
            if msg_content:
                # 这里把“发言人”和“正文”拆开保存，避免后续模型把昵称词汇误当成讨论主题。
                recent_lines.append(
                    self._format_recent_message_line(
                        idx=idx,
                        sender_name=str(msg_sender),
                        content=str(msg_content),
                        is_at=bool(item.get("is_at")),
                    )
                )
        return {
            "group_profile": group_profile or {"room_id": room_id},
            "speaker_profile": {
                "wxid": sender,
                "display_name": sender_name,
                "member_context": member_context or {},
            },
            "speaker_name_clean": self._clean_display_name(sender_name),
            "is_at": bool(trigger.get("is_at", False)),
            "is_directed": bool(trigger.get("is_directed", False)),
            "recent_message_items": self._build_recent_message_items(selected_messages),
            "recent_messages": recent_lines,
            "recent_summary": "",
            "trigger_type": trigger.get("trigger_type", "none"),
            "reply_mode": reply_mode,
            "flow_state": flow_state,
            "member_profile_brief_prompt": self._build_member_profile_brief_prompt(member_context or {}),
            "memory_prompt": self._build_member_memory_prompt(member_context, member_memory_focus or []),
            "at_member_profile_prompt": self._build_at_member_profile_prompt(
                member_context=member_context or {},
                focus_lines=member_memory_focus or [],
                is_at=bool(trigger.get("is_at", False)),
                is_directed=bool(trigger.get("is_directed", False)),
            ),
            "vector_memory_prompt": self._build_vector_memory_prompt(vector_memories),
            "social_memory_prompt": self._build_social_memory_prompt(social_memory or {}),
            "group_facts_prompt": self._build_group_facts_prompt(group_facts or {}),
            "group_long_memory_prompt": self._build_group_long_memory_prompt(group_profile or {}),
            "group_profile_prompt": self._build_group_profile_prompt(group_profile or {}),
            "quote_prompt": self._build_quote_prompt(quote_context or {}),
            "image_prompt": self._build_image_prompt(image_context or {}),
            "image_safety_prompt": self._build_image_safety_prompt(
                (quote_context or {}).get("image_safety") or {}
            ),
            "current_message": self._format_current_message_block(sender_name, content),
        }

    @staticmethod
    def _build_recent_message_items(messages: List[Dict]) -> List[Dict]:
        items: List[Dict] = []
        for idx, item in enumerate(messages, start=1):
            content = str(item.get("content") or item.get("message") or "").strip()
            if not content:
                continue
            items.append({
                "idx": idx,
                "sender": item.get("sender_name") or item.get("sender") or "未知成员",
                "content": content[:120],
                "is_at": bool(item.get("is_at")),
            })
        return items

    @staticmethod
    def _sanitize_inline_field(value: str, max_chars: int = 120) -> str:
        # 统一把换行和分隔符清掉，避免后续在单行结构化文本里把字段边界冲散。
        text = re.sub(r"\s+", " ", str(value or "")).strip()
        text = text.replace("|", "／")
        if len(text) > max_chars:
            return text[: max_chars - 3].rstrip() + "..."
        return text

    @classmethod
    def _format_recent_message_line(cls, idx: int, sender_name: str, content: str, is_at: bool = False) -> str:
        sender = cls._sanitize_inline_field(sender_name, max_chars=24) or "未知成员"
        body = cls._sanitize_inline_field(content, max_chars=120)
        parts = [f"[{idx:02d}]", f"发言人={sender}", f"正文={body}"]
        if is_at:
            parts.append("@bot=Y")
        return " | ".join(parts)

    @classmethod
    def _format_current_message_block(cls, sender_name: str, content: str) -> str:
        # 当前消息改成“元信息 + 正文”两段式，方便模型只把正文视为话题语义来源。
        sender = cls._sanitize_inline_field(sender_name, max_chars=24) or "未知成员"
        body = cls._sanitize_inline_field(content, max_chars=500)
        return f"发言人={sender}\n正文={body}"

    def _select_recent_messages(
        self,
        recent_messages: List[Dict],
        current_sender: str,
        current_content: str,
        quote_context: Dict,
    ) -> List[Dict]:
        if not recent_messages:
            return []
        # 这里直接把“最近 N 条”原样交给后续提示词层，而不是再做一次相关性裁剪：
        # 1. 用户明确要求给模型 30 条最近消息，方便推断群里正在讨论的上下文；
        # 2. 之前的“相关性筛选 + 尾部保留”虽然更省 token，但会打断对话连续性；
        # 3. 对群聊场景来说，连续现场通常比少量高分片段更有利于模型判断谁在接谁的话。
        #
        # 这里仍保留签名参数不动，是为了兼容上层调用，避免后续改动牵连太多。
        window = recent_messages[-self.recent_context_size:]
        return window

    @classmethod
    def _message_relevance(
        cls,
        item: Dict,
        *,
        current_sender: str,
        focus_tokens: set[str],
        quote_sender_name: str,
    ) -> int:
        content = str(item.get("content") or item.get("message") or "").strip()
        if not content:
            return 0
        sender = str(item.get("sender", "") or "")
        sender_name = str(item.get("sender_name", "") or "").strip().lower()
        score = 0

        if sender == current_sender:
            score += 3
        if quote_sender_name and quote_sender_name in sender_name:
            score += 3
        if item.get("is_at"):
            score += 1

        if focus_tokens:
            tokens = cls._extract_topic_tokens(content)
            overlap = focus_tokens & tokens
            score += min(len(overlap) * 2, 6)
            if overlap and cls._looks_like_question_or_answer(content):
                score += 2
        elif sender == current_sender:
            score += 1

        if cls._looks_like_question_or_answer(content):
            score += 1
        return score

    @staticmethod
    def _looks_like_question_or_answer(content: str) -> bool:
        text = str(content or "").strip().lower()
        if not text:
            return False
        patterns = [
            r"\?$", r"？$", r"怎么", r"如何", r"为啥", r"为什么", r"能不能", r"可以吗",
            r"报错", r"试试", r"先", r"然后", r"配置", r"日志", r"接口", r"原因",
        ]
        return any(re.search(pattern, text, flags=re.IGNORECASE) for pattern in patterns)

    @staticmethod
    def _extract_topic_tokens(content: str) -> set[str]:
        text = str(content or "").lower()
        tokens = set(re.findall(r"[a-z0-9_\\-]{3,}", text))
        keywords = [
            "openclaw", "qdrant", "ollama", "docker", "python", "api", "插件", "机器人", "模型",
            "日志", "配置", "报错", "部署", "联网", "图片", "记忆", "群聊", "dota", "战绩",
        ]
        for keyword in keywords:
            if keyword in text:
                tokens.add(keyword)
        return tokens

    @staticmethod
    def _clean_display_name(sender_name: str) -> str:
        import re

        text = str(sender_name or "").strip()
        if not text:
            return ""
        text = re.sub(r"\s+", "", text)
        text = re.sub(r"[^\u4e00-\u9fffA-Za-z0-9_]", "", text)
        return text[:8]

    @staticmethod
    def _build_member_profile_brief_prompt(member_context: Dict) -> str:
        # 这份摘要是“常驻给模型看的轻画像”：
        # 1. 不要求当前一定是 @ 或强定向，因为用户希望每次回答都能带上对这个人的基本认识；
        # 2. 这里只保留少量稳定信息，避免画像太重把当前问题压住；
        # 3. 更细的成员记忆、近期相关记忆，仍走后面的按需增强链路。
        if not member_context:
            return ""
        meta = member_context.get("meta", {}) or {}
        summary = str(member_context.get("summary_text", "") or "").strip()
        interaction_style = str(member_context.get("interaction_style", "") or "").strip()
        response_hint = str(member_context.get("response_style_hint", "") or "").strip()
        topics = ContextBuilder._stringify_items(member_context.get("topics_of_interest", []) or [], 3)
        recent_focus = ContextBuilder._stringify_items(member_context.get("recent_focus", []) or [], 2)
        skills = ContextBuilder._stringify_items(meta.get("skill_profile", []) or [], 2)
        reply_prefs = ContextBuilder._stringify_items(meta.get("long_term_reply_preferences", []) or [], 2)
        lines = [
            "当前发言人轻画像：",
            f"成员摘要：{summary}" if summary else "",
            f"互动风格：{interaction_style}" if interaction_style else "",
            f"偏好回复方式：{response_hint}" if response_hint else "",
            f"长期兴趣：{topics}" if topics else "",
            f"近期关注：{recent_focus}" if recent_focus else "",
            f"技能侧重点：{skills}" if skills else "",
            f"回复偏好：{reply_prefs}" if reply_prefs else "",
            "这些信息只用于帮助理解提问方式和回答切口，不要像在背档案。",
        ]
        return "\n".join([line for line in lines if line])

    @staticmethod
    def _build_member_memory_prompt(member_context: Dict, focus_lines: List[str] | None = None) -> str:
        if not member_context:
            return "暂无稳定成员画像。"
        meta = member_context.get("meta", {}) or {}
        topics = member_context.get("topics_of_interest", []) or []
        recent_focus = member_context.get("recent_focus", []) or []
        common_scenarios = ContextBuilder._stringify_items(meta.get("common_scenarios", []), 4)
        skills = ContextBuilder._stringify_items(meta.get("skill_profile", []), 5)
        problem_solving = ContextBuilder._stringify_items(meta.get("problem_solving_profile", []), 4)
        stable_traits = ContextBuilder._stringify_items(meta.get("stable_traits", []), 4)
        habits = ContextBuilder._stringify_items(meta.get("habit_patterns", []), 4)
        expression_profile = ContextBuilder._stringify_items(meta.get("expression_profile", []), 4)
        reply_entry = ContextBuilder._stringify_items(meta.get("reply_entry_profile", []), 4)
        reply_prefs = ContextBuilder._stringify_items(meta.get("long_term_reply_preferences", []), 4)
        recent_state = ContextBuilder._stringify_items(meta.get("recent_state", []), 4)
        reply_taboos = ContextBuilder._stringify_items(meta.get("reply_taboos", []), 3)
        lines = [
            f"成员摘要：{member_context.get('summary_text', '')}".strip(),
            f"互动风格：{member_context.get('interaction_style', '')}".strip(),
            f"回复偏好：{member_context.get('response_style_hint', '')}".strip(),
            f"本次相关记忆：{'；'.join((focus_lines or [])[:4])}" if focus_lines else "",
            f"长期主题：{', '.join(topics[:5])}" if topics else "",
            f"近期关注：{', '.join(recent_focus[:4])}" if recent_focus else "",
            f"常见发言场景：{common_scenarios}" if common_scenarios else "",
            f"技能侧重点：{skills}" if skills else "",
            f"处理问题方式：{problem_solving}" if problem_solving else "",
            f"稳定特征：{stable_traits}" if stable_traits else "",
            f"习惯模式：{habits}" if habits else "",
            f"表达标记：{expression_profile}" if expression_profile else "",
            f"有效接话点：{reply_entry}" if reply_entry else "",
            f"长期回复偏好：{reply_prefs}" if reply_prefs else "",
            f"近期状态：{recent_state}" if recent_state else "",
            f"气质倾向：{meta.get('temperament_tendency', '')}".strip(),
            f"群内角色：{meta.get('group_role', '')}".strip(),
            f"回复禁忌：{reply_taboos}" if reply_taboos else "",
        ]
        return "\n".join([line for line in lines if line])

    @staticmethod
    def _build_at_member_profile_prompt(
        member_context: Dict,
        focus_lines: List[str] | None = None,
        is_at: bool = False,
        is_directed: bool = False,
    ) -> str:
        # 只有明确 @ 或强定向时才给“高优先级成员画像”，避免平时过度套人设
        if not (is_at or is_directed):
            return ""
        if not member_context:
            return "本次是对方点名发起，但暂无稳定画像，按自然群友口吻短回复。"

        meta = member_context.get("meta", {}) or {}
        summary = str(member_context.get("summary_text", "") or "").strip()
        interaction_style = str(member_context.get("interaction_style", "") or "").strip()
        response_hint = str(member_context.get("response_style_hint", "") or "").strip()
        topics = ContextBuilder._stringify_items(member_context.get("topics_of_interest", []) or [], 4)
        focus = "；".join((focus_lines or [])[:3]).strip()
        lines = [
            "本次为点名互动，优先参考该成员画像后再回复：",
            f"成员摘要：{summary}" if summary else "",
            f"互动风格：{interaction_style}" if interaction_style else "",
            f"偏好回复方式：{response_hint}" if response_hint else "",
            f"近期相关记忆：{focus}" if focus else "",
            f"长期兴趣：{topics}" if topics else "",
            f"禁忌提醒：{ContextBuilder._stringify_items(meta.get('reply_taboos', []), 3)}"
            if meta.get("reply_taboos")
            else "",
            "语气要像熟悉的群友，短句、自然，不要客服腔。",
        ]
        return "\n".join([line for line in lines if line])

    @staticmethod
    def _stringify_items(items: List | str, limit: int) -> str:
        if isinstance(items, str):
            return items.strip()
        values: List[str] = []
        for item in items[:limit]:
            if isinstance(item, dict):
                value = str(
                    item.get("name")
                    or item.get("label")
                    or item.get("value")
                    or item.get("text")
                    or ""
                ).strip()
            else:
                value = str(item or "").strip()
            if value and value not in values:
                values.append(value)
        return ", ".join(values)

    @staticmethod
    def _build_vector_memory_prompt(vector_memories: List[Dict]) -> str:
        if not vector_memories:
            return ""
        lines = []
        for item in vector_memories[:2]:
            summary = item.get("content_summary") or item.get("summary_text") or item.get("text") or ""
            memory_type = item.get("memory_type", "memory")
            if summary:
                lines.append(f"[{memory_type}] {summary}")
        return "\n".join(lines)

    @staticmethod
    def _build_social_memory_prompt(social_memory: Dict) -> str:
        prompt = str((social_memory or {}).get("prompt", "") or "").strip()
        return prompt

    @staticmethod
    def _build_group_facts_prompt(group_facts: Dict) -> str:
        return str((group_facts or {}).get("prompt", "") or "").strip()

    @staticmethod
    def _build_group_long_memory_prompt(group_profile: Dict) -> str:
        # 这份摘要是“群长期背景常驻层”：
        # 1. 每次都给一小段，帮助模型知道这个群长期在聊什么、什么风格更合适；
        # 2. 不把完整群画像整段塞进去，避免大量通用风格描述把 token 吃满；
        # 3. 更细的群事实、群关系仍走相关性增强链路。
        if not group_profile:
            return ""
        structured = group_profile.get("group_memory_structured", {}) or {}
        summary = ContextBuilder._compact_group_summary(str(group_profile.get("group_memory_summary", "") or ""), max_chars=220, max_sentences=4)
        focus = ", ".join(group_profile.get("knowledge_focus", [])[:4])
        memory_style = ContextBuilder._build_style_summary(group_profile.get("group_memory_style", {}))
        stable_topics = ContextBuilder._stringify_items(structured.get("stable_topics", []) or [], 4)
        recent_points = ContextBuilder._stringify_items(structured.get("recent_key_points", []) or [], 3)
        unresolved_points = ContextBuilder._stringify_items(structured.get("unresolved_points", []) or [], 3)
        resource_clues = ContextBuilder._stringify_items(structured.get("resource_clues", []) or [], 3)
        role_hints = ContextBuilder._stringify_items(structured.get("role_hints", []) or [], 3)
        summary_days = int(group_profile.get("group_memory_summary_days", 0) or 0)
        lines = [
            "群长期背景：",
            f"摘要观察窗口：最近 {summary_days} 份群总结" if summary_days > 0 else "",
            f"稳定主题：{stable_topics}" if stable_topics else "",
            f"近期重点：{recent_points}" if recent_points else "",
            f"未决问题：{unresolved_points}" if unresolved_points else "",
            f"共享资源/线索：{resource_clues}" if resource_clues else "",
            f"角色线索：{role_hints}" if role_hints else "",
            f"长期摘要：{summary}" if summary else "",
            f"常聊方向：{focus}" if focus else "",
            f"历史社交风格：{memory_style}" if memory_style else "",
        ]
        return "\n".join([line for line in lines if line])

    @staticmethod
    def _build_group_profile_prompt(group_profile: Dict) -> str:
        if not group_profile:
            return "当前群没有特殊知识域限制。"
        structured = group_profile.get("group_memory_structured", {}) or {}
        focus = ", ".join(group_profile.get("knowledge_focus", [])[:6])
        boundaries = ", ".join(group_profile.get("topic_boundaries", [])[:6])
        summary = ContextBuilder._compact_group_summary(str(group_profile.get("group_memory_summary", "") or ""))
        stable_topics = ContextBuilder._stringify_items(structured.get("stable_topics", []) or [], 4)
        recent_points = ContextBuilder._stringify_items(structured.get("recent_key_points", []) or [], 3)
        unresolved_points = ContextBuilder._stringify_items(structured.get("unresolved_points", []) or [], 3)
        resource_clues = ContextBuilder._stringify_items(structured.get("resource_clues", []) or [], 3)
        role_hints = ContextBuilder._stringify_items(structured.get("role_hints", []) or [], 3)
        lines = [
            f"群模式：{group_profile.get('mode', 'social')}",
            f"知识域偏向：{group_profile.get('knowledge_domain', 'general')}（仅作理解倾向，不是每次都要显式提到）",
            f"配置知识域：{group_profile.get('configured_domain', 'general')}（仅在当前话题相关时参考）",
            f"历史推断知识域：{group_profile.get('group_memory_domain', 'general')}（弱参考）",
            f"回答风格：{group_profile.get('reply_style', '自然短句')}",
            f"互动调性：{group_profile.get('interaction_tone', '自然群友感')}",
            f"幽默强度：{group_profile.get('humor_style', '轻微')}",
            f"嘴硬程度：{group_profile.get('sharpness_style', '轻微嘴硬，不刻薄')}",
            f"表达松弛度：{group_profile.get('expressiveness_style', '克制')}",
            f"称呼强度：{group_profile.get('address_style', '低频称呼，默认直接接话')}",
            f"可能相关的话题背景：{focus}" if focus else "",
            # 这里显式把群摘要结构字段展开给模型：
            # 1. LLM 更擅长消费清晰字段，而不是再从 markdown 文案里二次猜测；
            # 2. “稳定主题/近期重点/未决问题”分别承载不同决策用途，混成一段反而不好用；
            # 3. 仍然保留原摘要关键句，作为字段缺失时的人类可读兜底。
            f"群摘要稳定主题：{stable_topics}" if stable_topics else "",
            f"群摘要近期重点：{recent_points}" if recent_points else "",
            f"群摘要未决问题：{unresolved_points}" if unresolved_points else "",
            f"群摘要资源线索：{resource_clues}" if resource_clues else "",
            f"群摘要角色线索：{role_hints}" if role_hints else "",
            f"群长期摘要关键句：{summary}" if summary else "",
            f"历史推断社交风格：{ContextBuilder._build_style_summary(group_profile.get('group_memory_style', {}))}"
            if group_profile.get("group_memory_style")
            else "",
            f"边界提醒：{boundaries}" if boundaries else "",
            f"人格叠加：{group_profile.get('persona_overlay', '')}".strip(),
        ]
        return "\n".join([line for line in lines if line])

    @staticmethod
    def _build_style_summary(style_profile: Dict) -> str:
        if not style_profile:
            return ""
        return " / ".join(
            [
                str(style_profile.get("interaction_tone", "") or "").strip(),
                str(style_profile.get("humor_style", "") or "").strip(),
                str(style_profile.get("sharpness_style", "") or "").strip(),
                str(style_profile.get("expressiveness_style", "") or "").strip(),
            ]
        ).strip(" /")

    @staticmethod
    def _compact_group_summary(summary_text: str, max_chars: int = 420, max_sentences: int = 6) -> str:
        text = str(summary_text or "").strip()
        if not text:
            return ""
        text = re.sub(r"\s+", " ", text.replace("\n", " ").replace("\r", " ")).strip()
        if len(text) <= max_chars:
            return text

        # 长摘要按句提炼，优先保留“结论/风险/动作/配置”等关键词句，避免简单截断丢重点。
        sentences = [part.strip(" ，,；;。.!?！？:：") for part in re.split(r"[。！？!?；;\n]+", text) if part.strip()]
        if not sentences:
            return text[: max_chars - 3] + "..."

        key_patterns = [
            r"结论|核心|重点|关键|建议|方案|步骤|原因|影响|风险|注意|问题|异常|报错|故障|超时|阻塞",
            r"配置|参数|阈值|策略|限制|回退|优化|修复|排查|上线|回滚|依赖|版本|兼容",
        ]
        scored: List[tuple[int, int, str]] = []
        for idx, sentence in enumerate(sentences):
            score = 0
            for pattern in key_patterns:
                if re.search(pattern, sentence, flags=re.IGNORECASE):
                    score += 3
            if re.search(r"\d", sentence):
                score += 1
            if 8 <= len(sentence) <= 80:
                score += 1
            if idx == 0 or idx == len(sentences) - 1:
                score += 1
            scored.append((score, idx, sentence))

        chosen_indexes = {0, len(sentences) - 1}
        for _, idx, _ in sorted(scored, key=lambda x: (-x[0], x[1])):
            chosen_indexes.add(idx)
            if len(chosen_indexes) >= max_sentences:
                break

        chosen = [sentences[idx] for idx in sorted(chosen_indexes)]
        merged = "；".join([item for item in chosen if item]).strip("；")
        if len(merged) <= max_chars:
            return merged
        return merged[: max_chars - 3].rstrip(" ，,；;。.!?！？:：") + "..."

    @staticmethod
    def _build_quote_prompt(quote_context: Dict) -> str:
        if not quote_context:
            return ""
        quote_type = quote_context.get("quote_type_label", "引用消息")
        quote_sender = (quote_context.get("quote_sender_name", "") or "").strip()
        quote_body = quote_context.get("quote_body", "") or ""
        title = quote_context.get("title", "") or ""
        lines = [
            f"用户这次是在引用消息后发言。",
            f"引用类型：{quote_type}",
            f"被引用发送者：{quote_sender}" if quote_sender and quote_sender != "未知成员" else "",
            f"图片附件：已附带原图" if quote_context.get("has_image_attachment") else "",
            f"引用标题：{title}" if title else "",
            f"被引用内容：{quote_body}" if quote_body else "",
        ]
        payload = [line for line in lines if line]
        # 兜底：如果最终只剩“引用类型”，没有可用内容，就不输出引用补充
        if len(payload) <= 2 and not quote_body and not title:
            return ""
        return "\n".join(payload)

    @staticmethod
    def _build_image_prompt(image_context: Dict) -> str:
        if not image_context:
            return ""
        lines = [
            "已附带最近一张群图片作为上下文。",
            f"图片发送者：{image_context.get('sender_name', '未知成员')}",
            f"图片说明：{image_context.get('hint', '')}" if image_context.get("hint") else "",
        ]
        return "\n".join([line for line in lines if line])

    @staticmethod
    def _build_image_safety_prompt(image_safety: Dict) -> str:
        if not image_safety or not image_safety.get("suspected"):
            return ""
        if image_safety.get("has_visual_context"):
            return "当前发言疑似是在评论图片，但本次已附带图片上下文，可以基于图片谨慎理解。"
        reason = str(image_safety.get("reason", "") or "").strip()
        lines = [
            "当前发言疑似是在评论图片，但你这次没有看到图片本身。",
            f"原因：{reason}" if reason else "",
            "不要假装看过图，不要直接评价画面细节、人物状态、构图、文字内容或颜色元素。",
            "如果要回，只能轻微承认信息不足，或请对方引用图片/补一句文字说明，再继续。",
        ]
        return "\n".join([line for line in lines if line])