from __future__ import annotations import re from typing import Dict, List class ContextBuilder: def __init__(self, recent_context_size: int = 30): self.recent_context_size = max(int(recent_context_size or 30), 1) def build( self, *, room_id: str, group_profile: Dict, sender: str, sender_name: str, content: str, recent_messages: List[Dict], member_context: Dict, member_memory_focus: List[str] | None = None, trigger: Dict, flow_state: str, reply_mode: str, vector_memories: List[Dict], social_memory: Dict | None = None, group_facts: Dict | None = None, quote_context: Dict | None = None, image_context: Dict | None = None, ) -> Dict: selected_messages = self._select_recent_messages(recent_messages, sender, content, quote_context or {}) recent_lines = [] for idx, item in enumerate(selected_messages, start=1): msg_sender = item.get("sender_name") or item.get("sender") or "未知成员" msg_content = item.get("content") or item.get("message") or "" if msg_content: # 这里把“发言人”和“正文”拆开保存,避免后续模型把昵称词汇误当成讨论主题。 recent_lines.append( self._format_recent_message_line( idx=idx, sender_name=str(msg_sender), content=str(msg_content), is_at=bool(item.get("is_at")), ) ) return { "group_profile": group_profile or {"room_id": room_id}, "speaker_profile": { "wxid": sender, "display_name": sender_name, "member_context": member_context or {}, }, "speaker_name_clean": self._clean_display_name(sender_name), "is_at": bool(trigger.get("is_at", False)), "is_directed": bool(trigger.get("is_directed", False)), # 这类标记会被后面的 prompt 策略层消费,用来决定要不要放开群级记忆。 "is_group_memory_query": bool(trigger.get("is_group_memory_query", False)), "recent_message_items": self._build_recent_message_items(selected_messages), "recent_messages": recent_lines, "recent_summary": "", "trigger_type": trigger.get("trigger_type", "none"), "reply_mode": reply_mode, "flow_state": flow_state, "member_profile_brief_prompt": self._build_member_profile_brief_prompt(member_context or {}), "memory_prompt": self._build_member_memory_prompt(member_context, member_memory_focus or []), "at_member_profile_prompt": self._build_at_member_profile_prompt( member_context=member_context or {}, focus_lines=member_memory_focus or [], is_at=bool(trigger.get("is_at", False)), is_directed=bool(trigger.get("is_directed", False)), ), "vector_memory_prompt": self._build_vector_memory_prompt(vector_memories), "social_memory_prompt": self._build_social_memory_prompt(social_memory or {}), "group_facts_prompt": self._build_group_facts_prompt(group_facts or {}), "group_long_memory_prompt": self._build_group_long_memory_prompt(group_profile or {}), "group_profile_prompt": self._build_group_profile_prompt(group_profile or {}), "quote_prompt": self._build_quote_prompt(quote_context or {}), "image_prompt": self._build_image_prompt(image_context or {}), "image_safety_prompt": self._build_image_safety_prompt( (quote_context or {}).get("image_safety") or {} ), "current_message": self._format_current_message_block(sender_name, content), } @staticmethod def _build_recent_message_items(messages: List[Dict]) -> List[Dict]: items: List[Dict] = [] for idx, item in enumerate(messages, start=1): content = str(item.get("content") or item.get("message") or "").strip() if not content: continue items.append({ "idx": idx, "sender": item.get("sender_name") or item.get("sender") or "未知成员", "content": content[:120], "is_at": bool(item.get("is_at")), }) return items @staticmethod def _sanitize_inline_field(value: str, max_chars: int = 120) -> str: # 统一把换行和分隔符清掉,避免后续在单行结构化文本里把字段边界冲散。 text = re.sub(r"\s+", " ", str(value or "")).strip() text = text.replace("|", "/") if len(text) > max_chars: return text[: max_chars - 3].rstrip() + "..." return text @classmethod def _format_recent_message_line(cls, idx: int, sender_name: str, content: str, is_at: bool = False) -> str: sender = cls._sanitize_inline_field(sender_name, max_chars=24) or "未知成员" body = cls._sanitize_inline_field(content, max_chars=120) parts = [f"[{idx:02d}]", f"发言人={sender}", f"正文={body}"] if is_at: parts.append("@bot=Y") return " | ".join(parts) @classmethod def _format_current_message_block(cls, sender_name: str, content: str) -> str: # 当前消息改成“元信息 + 正文”两段式,方便模型只把正文视为话题语义来源。 sender = cls._sanitize_inline_field(sender_name, max_chars=24) or "未知成员" body = cls._sanitize_inline_field(content, max_chars=500) return f"发言人={sender}\n正文={body}" def _select_recent_messages( self, recent_messages: List[Dict], current_sender: str, current_content: str, quote_context: Dict, ) -> List[Dict]: if not recent_messages: return [] # 这里直接把“最近 N 条”原样交给后续提示词层,而不是再做一次相关性裁剪: # 1. 用户明确要求给模型 30 条最近消息,方便推断群里正在讨论的上下文; # 2. 之前的“相关性筛选 + 尾部保留”虽然更省 token,但会打断对话连续性; # 3. 对群聊场景来说,连续现场通常比少量高分片段更有利于模型判断谁在接谁的话。 # # 这里仍保留签名参数不动,是为了兼容上层调用,避免后续改动牵连太多。 window = recent_messages[-self.recent_context_size:] return window @classmethod def _message_relevance( cls, item: Dict, *, current_sender: str, focus_tokens: set[str], quote_sender_name: str, ) -> int: content = str(item.get("content") or item.get("message") or "").strip() if not content: return 0 sender = str(item.get("sender", "") or "") sender_name = str(item.get("sender_name", "") or "").strip().lower() score = 0 if sender == current_sender: score += 3 if quote_sender_name and quote_sender_name in sender_name: score += 3 if item.get("is_at"): score += 1 if focus_tokens: tokens = cls._extract_topic_tokens(content) overlap = focus_tokens & tokens score += min(len(overlap) * 2, 6) if overlap and cls._looks_like_question_or_answer(content): score += 2 elif sender == current_sender: score += 1 if cls._looks_like_question_or_answer(content): score += 1 return score @staticmethod def _looks_like_question_or_answer(content: str) -> bool: text = str(content or "").strip().lower() if not text: return False patterns = [ r"\?$", r"?$", r"怎么", r"如何", r"为啥", r"为什么", r"能不能", r"可以吗", r"报错", r"试试", r"先", r"然后", r"配置", r"日志", r"接口", r"原因", ] return any(re.search(pattern, text, flags=re.IGNORECASE) for pattern in patterns) @staticmethod def _extract_topic_tokens(content: str) -> set[str]: text = str(content or "").lower() tokens = set(re.findall(r"[a-z0-9_\\-]{3,}", text)) keywords = [ "openclaw", "qdrant", "ollama", "docker", "python", "api", "插件", "机器人", "模型", "日志", "配置", "报错", "部署", "联网", "图片", "记忆", "群聊", "dota", "战绩", ] for keyword in keywords: if keyword in text: tokens.add(keyword) return tokens @staticmethod def _clean_display_name(sender_name: str) -> str: import re text = str(sender_name or "").strip() if not text: return "" text = re.sub(r"\s+", "", text) text = re.sub(r"[^\u4e00-\u9fffA-Za-z0-9_]", "", text) return text[:8] @staticmethod def _build_member_profile_brief_prompt(member_context: Dict) -> str: # 这份摘要是“常驻给模型看的轻画像”: # 1. 不要求当前一定是 @ 或强定向,因为用户希望每次回答都能带上对这个人的基本认识; # 2. 这里只保留少量稳定信息,避免画像太重把当前问题压住; # 3. 更细的成员记忆、近期相关记忆,仍走后面的按需增强链路。 if not member_context: return "" meta = member_context.get("meta", {}) or {} summary = str(member_context.get("summary_text", "") or "").strip() interaction_style = str(member_context.get("interaction_style", "") or "").strip() response_hint = str(member_context.get("response_style_hint", "") or "").strip() topics = ContextBuilder._stringify_items(member_context.get("topics_of_interest", []) or [], 3) recent_focus = ContextBuilder._stringify_items(member_context.get("recent_focus", []) or [], 2) skills = ContextBuilder._stringify_items(meta.get("skill_profile", []) or [], 2) reply_prefs = ContextBuilder._stringify_items(meta.get("long_term_reply_preferences", []) or [], 2) lines = [ "当前发言人轻画像:", f"成员摘要:{summary}" if summary else "", f"互动风格:{interaction_style}" if interaction_style else "", f"偏好回复方式:{response_hint}" if response_hint else "", f"长期兴趣:{topics}" if topics else "", f"近期关注:{recent_focus}" if recent_focus else "", f"技能侧重点:{skills}" if skills else "", f"回复偏好:{reply_prefs}" if reply_prefs else "", "这些信息只用于帮助理解提问方式和回答切口,不要像在背档案。", ] return "\n".join([line for line in lines if line]) @staticmethod def _build_member_memory_prompt(member_context: Dict, focus_lines: List[str] | None = None) -> str: if not member_context: return "暂无稳定成员画像。" meta = member_context.get("meta", {}) or {} topics = member_context.get("topics_of_interest", []) or [] recent_focus = member_context.get("recent_focus", []) or [] common_scenarios = ContextBuilder._stringify_items(meta.get("common_scenarios", []), 4) skills = ContextBuilder._stringify_items(meta.get("skill_profile", []), 5) problem_solving = ContextBuilder._stringify_items(meta.get("problem_solving_profile", []), 4) stable_traits = ContextBuilder._stringify_items(meta.get("stable_traits", []), 4) habits = ContextBuilder._stringify_items(meta.get("habit_patterns", []), 4) expression_profile = ContextBuilder._stringify_items(meta.get("expression_profile", []), 4) reply_entry = ContextBuilder._stringify_items(meta.get("reply_entry_profile", []), 4) reply_prefs = ContextBuilder._stringify_items(meta.get("long_term_reply_preferences", []), 4) recent_state = ContextBuilder._stringify_items(meta.get("recent_state", []), 4) reply_taboos = ContextBuilder._stringify_items(meta.get("reply_taboos", []), 3) lines = [ f"成员摘要:{member_context.get('summary_text', '')}".strip(), f"互动风格:{member_context.get('interaction_style', '')}".strip(), f"回复偏好:{member_context.get('response_style_hint', '')}".strip(), f"本次相关记忆:{';'.join((focus_lines or [])[:4])}" if focus_lines else "", f"长期主题:{', '.join(topics[:5])}" if topics else "", f"近期关注:{', '.join(recent_focus[:4])}" if recent_focus else "", f"常见发言场景:{common_scenarios}" if common_scenarios else "", f"技能侧重点:{skills}" if skills else "", f"处理问题方式:{problem_solving}" if problem_solving else "", f"稳定特征:{stable_traits}" if stable_traits else "", f"习惯模式:{habits}" if habits else "", f"表达标记:{expression_profile}" if expression_profile else "", f"有效接话点:{reply_entry}" if reply_entry else "", f"长期回复偏好:{reply_prefs}" if reply_prefs else "", f"近期状态:{recent_state}" if recent_state else "", f"气质倾向:{meta.get('temperament_tendency', '')}".strip(), f"群内角色:{meta.get('group_role', '')}".strip(), f"回复禁忌:{reply_taboos}" if reply_taboos else "", ] return "\n".join([line for line in lines if line]) @staticmethod def _build_at_member_profile_prompt( member_context: Dict, focus_lines: List[str] | None = None, is_at: bool = False, is_directed: bool = False, ) -> str: # 只有明确 @ 或强定向时才给“高优先级成员画像”,避免平时过度套人设 if not (is_at or is_directed): return "" if not member_context: return "本次是对方点名发起,但暂无稳定画像,按自然群友口吻短回复。" meta = member_context.get("meta", {}) or {} summary = str(member_context.get("summary_text", "") or "").strip() interaction_style = str(member_context.get("interaction_style", "") or "").strip() response_hint = str(member_context.get("response_style_hint", "") or "").strip() topics = ContextBuilder._stringify_items(member_context.get("topics_of_interest", []) or [], 4) focus = ";".join((focus_lines or [])[:3]).strip() lines = [ "本次为点名互动,优先参考该成员画像后再回复:", f"成员摘要:{summary}" if summary else "", f"互动风格:{interaction_style}" if interaction_style else "", f"偏好回复方式:{response_hint}" if response_hint else "", f"近期相关记忆:{focus}" if focus else "", f"长期兴趣:{topics}" if topics else "", f"禁忌提醒:{ContextBuilder._stringify_items(meta.get('reply_taboos', []), 3)}" if meta.get("reply_taboos") else "", "语气要像熟悉的群友,短句、自然,不要客服腔。", ] return "\n".join([line for line in lines if line]) @staticmethod def _stringify_items(items: List | str, limit: int) -> str: if isinstance(items, str): return items.strip() values: List[str] = [] for item in items[:limit]: if isinstance(item, dict): value = str( item.get("name") or item.get("label") or item.get("value") or item.get("text") or "" ).strip() else: value = str(item or "").strip() if value and value not in values: values.append(value) return ", ".join(values) @staticmethod def _build_vector_memory_prompt(vector_memories: List[Dict]) -> str: if not vector_memories: return "" lines = [] for item in vector_memories[:2]: summary = item.get("content_summary") or item.get("summary_text") or item.get("text") or "" memory_type = item.get("memory_type", "memory") if summary: lines.append(f"[{memory_type}] {summary}") return "\n".join(lines) @staticmethod def _build_social_memory_prompt(social_memory: Dict) -> str: prompt = str((social_memory or {}).get("prompt", "") or "").strip() return prompt @staticmethod def _build_group_facts_prompt(group_facts: Dict) -> str: return str((group_facts or {}).get("prompt", "") or "").strip() @staticmethod def _build_group_long_memory_prompt(group_profile: Dict) -> str: # 这份摘要是“群长期背景常驻层”: # 1. 每次都给一小段,帮助模型知道这个群长期在聊什么、什么风格更合适; # 2. 不把完整群画像整段塞进去,避免大量通用风格描述把 token 吃满; # 3. 更细的群事实、群关系仍走相关性增强链路。 if not group_profile: return "" structured = group_profile.get("group_memory_structured", {}) or {} summary = ContextBuilder._compact_group_summary(str(group_profile.get("group_memory_summary", "") or ""), max_chars=220, max_sentences=4) focus = ", ".join(group_profile.get("knowledge_focus", [])[:4]) memory_style = ContextBuilder._build_style_summary(group_profile.get("group_memory_style", {})) stable_topics = ContextBuilder._stringify_items(structured.get("stable_topics", []) or [], 4) recent_points = ContextBuilder._stringify_items(structured.get("recent_key_points", []) or [], 3) unresolved_points = ContextBuilder._stringify_items(structured.get("unresolved_points", []) or [], 3) resource_clues = ContextBuilder._stringify_items(structured.get("resource_clues", []) or [], 3) role_hints = ContextBuilder._stringify_items(structured.get("role_hints", []) or [], 3) summary_days = int(group_profile.get("group_memory_summary_days", 0) or 0) lines = [ "群长期背景:", f"摘要观察窗口:最近 {summary_days} 份群总结" if summary_days > 0 else "", f"稳定主题:{stable_topics}" if stable_topics else "", f"近期重点:{recent_points}" if recent_points else "", f"未决问题:{unresolved_points}" if unresolved_points else "", f"共享资源/线索:{resource_clues}" if resource_clues else "", f"角色线索:{role_hints}" if role_hints else "", f"长期摘要:{summary}" if summary else "", f"常聊方向:{focus}" if focus else "", f"历史社交风格:{memory_style}" if memory_style else "", ] return "\n".join([line for line in lines if line]) @staticmethod def _build_group_profile_prompt(group_profile: Dict) -> str: if not group_profile: return "当前群没有特殊知识域限制。" structured = group_profile.get("group_memory_structured", {}) or {} focus = ", ".join(group_profile.get("knowledge_focus", [])[:6]) boundaries = ", ".join(group_profile.get("topic_boundaries", [])[:6]) summary = ContextBuilder._compact_group_summary(str(group_profile.get("group_memory_summary", "") or "")) stable_topics = ContextBuilder._stringify_items(structured.get("stable_topics", []) or [], 4) recent_points = ContextBuilder._stringify_items(structured.get("recent_key_points", []) or [], 3) unresolved_points = ContextBuilder._stringify_items(structured.get("unresolved_points", []) or [], 3) resource_clues = ContextBuilder._stringify_items(structured.get("resource_clues", []) or [], 3) role_hints = ContextBuilder._stringify_items(structured.get("role_hints", []) or [], 3) lines = [ f"群模式:{group_profile.get('mode', 'social')}", f"知识域偏向:{group_profile.get('knowledge_domain', 'general')}(仅作理解倾向,不是每次都要显式提到)", f"配置知识域:{group_profile.get('configured_domain', 'general')}(仅在当前话题相关时参考)", f"历史推断知识域:{group_profile.get('group_memory_domain', 'general')}(弱参考)", f"回答风格:{group_profile.get('reply_style', '自然短句')}", f"互动调性:{group_profile.get('interaction_tone', '自然群友感')}", f"幽默强度:{group_profile.get('humor_style', '轻微')}", f"嘴硬程度:{group_profile.get('sharpness_style', '轻微嘴硬,不刻薄')}", f"表达松弛度:{group_profile.get('expressiveness_style', '克制')}", f"称呼强度:{group_profile.get('address_style', '低频称呼,默认直接接话')}", f"可能相关的话题背景:{focus}" if focus else "", # 这里显式把群摘要结构字段展开给模型: # 1. LLM 更擅长消费清晰字段,而不是再从 markdown 文案里二次猜测; # 2. “稳定主题/近期重点/未决问题”分别承载不同决策用途,混成一段反而不好用; # 3. 仍然保留原摘要关键句,作为字段缺失时的人类可读兜底。 f"群摘要稳定主题:{stable_topics}" if stable_topics else "", f"群摘要近期重点:{recent_points}" if recent_points else "", f"群摘要未决问题:{unresolved_points}" if unresolved_points else "", f"群摘要资源线索:{resource_clues}" if resource_clues else "", f"群摘要角色线索:{role_hints}" if role_hints else "", f"群长期摘要关键句:{summary}" if summary else "", f"历史推断社交风格:{ContextBuilder._build_style_summary(group_profile.get('group_memory_style', {}))}" if group_profile.get("group_memory_style") else "", f"边界提醒:{boundaries}" if boundaries else "", f"人格叠加:{group_profile.get('persona_overlay', '')}".strip(), ] return "\n".join([line for line in lines if line]) @staticmethod def _build_style_summary(style_profile: Dict) -> str: if not style_profile: return "" return " / ".join( [ str(style_profile.get("interaction_tone", "") or "").strip(), str(style_profile.get("humor_style", "") or "").strip(), str(style_profile.get("sharpness_style", "") or "").strip(), str(style_profile.get("expressiveness_style", "") or "").strip(), ] ).strip(" /") @staticmethod def _compact_group_summary(summary_text: str, max_chars: int = 420, max_sentences: int = 6) -> str: text = str(summary_text or "").strip() if not text: return "" text = re.sub(r"\s+", " ", text.replace("\n", " ").replace("\r", " ")).strip() if len(text) <= max_chars: return text # 长摘要按句提炼,优先保留“结论/风险/动作/配置”等关键词句,避免简单截断丢重点。 sentences = [part.strip(" ,,;;。.!?!?::") for part in re.split(r"[。!?!?;;\n]+", text) if part.strip()] if not sentences: return text[: max_chars - 3] + "..." key_patterns = [ r"结论|核心|重点|关键|建议|方案|步骤|原因|影响|风险|注意|问题|异常|报错|故障|超时|阻塞", r"配置|参数|阈值|策略|限制|回退|优化|修复|排查|上线|回滚|依赖|版本|兼容", ] scored: List[tuple[int, int, str]] = [] for idx, sentence in enumerate(sentences): score = 0 for pattern in key_patterns: if re.search(pattern, sentence, flags=re.IGNORECASE): score += 3 if re.search(r"\d", sentence): score += 1 if 8 <= len(sentence) <= 80: score += 1 if idx == 0 or idx == len(sentences) - 1: score += 1 scored.append((score, idx, sentence)) chosen_indexes = {0, len(sentences) - 1} for _, idx, _ in sorted(scored, key=lambda x: (-x[0], x[1])): chosen_indexes.add(idx) if len(chosen_indexes) >= max_sentences: break chosen = [sentences[idx] for idx in sorted(chosen_indexes)] merged = ";".join([item for item in chosen if item]).strip(";") if len(merged) <= max_chars: return merged return merged[: max_chars - 3].rstrip(" ,,;;。.!?!?::") + "..." @staticmethod def _build_quote_prompt(quote_context: Dict) -> str: if not quote_context: return "" quote_type = quote_context.get("quote_type_label", "引用消息") quote_sender = (quote_context.get("quote_sender_name", "") or "").strip() quote_body = quote_context.get("quote_body", "") or "" title = quote_context.get("title", "") or "" lines = [ f"用户这次是在引用消息后发言。", f"引用类型:{quote_type}", f"被引用发送者:{quote_sender}" if quote_sender and quote_sender != "未知成员" else "", f"图片附件:已附带原图" if quote_context.get("has_image_attachment") else "", f"引用标题:{title}" if title else "", f"被引用内容:{quote_body}" if quote_body else "", ] payload = [line for line in lines if line] # 兜底:如果最终只剩“引用类型”,没有可用内容,就不输出引用补充 if len(payload) <= 2 and not quote_body and not title: return "" return "\n".join(payload) @staticmethod def _build_image_prompt(image_context: Dict) -> str: if not image_context: return "" lines = [ "已附带最近一张群图片作为上下文。", f"图片发送者:{image_context.get('sender_name', '未知成员')}", f"图片说明:{image_context.get('hint', '')}" if image_context.get("hint") else "", ] return "\n".join([line for line in lines if line]) @staticmethod def _build_image_safety_prompt(image_safety: Dict) -> str: if not image_safety or not image_safety.get("suspected"): return "" if image_safety.get("has_visual_context"): return "当前发言疑似是在评论图片,但本次已附带图片上下文,可以基于图片谨慎理解。" reason = str(image_safety.get("reason", "") or "").strip() lines = [ "当前发言疑似是在评论图片,但你这次没有看到图片本身。", f"原因:{reason}" if reason else "", "不要假装看过图,不要直接评价画面细节、人物状态、构图、文字内容或颜色元素。", "如果要回,只能轻微承认信息不足,或请对方引用图片/补一句文字说明,再继续。", ] return "\n".join([line for line in lines if line])